示例#1
0
文件: panel.py 项目: A02l01/Navautron
	def find_grid(self,inp):
		dx,dy = [],[]
		outx,outy = [],[]
		for i in inp:
			dy.append([i[2],0])
		data = np.vstack(dy)
		centroids,_ = kmeans(data,self.Ny)
		idx,_ = vq(data,centroids)
		for ii in range(0,self.Ny):
			mini = np.min(data[idx==ii,0])
			maxi = np.max(data[idx==ii,0])
			outy.append([mini,maxi])
		outy = sorted(outy[:],key=lambda s:s[1])  
		dx = []
		for i in inp:
			dx.append([i[1],0])
		data = np.vstack(dx)
		centroids,_ = kmeans(data,self.Nx)
		idx,_ = vq(data,centroids)
		for ii in range(0,self.Nx):
			mini = np.min(data[idx==ii,0])
			maxi = np.max(data[idx==ii,0])
			outx.append([mini,maxi])
		outx = sorted(outx[:],key=lambda s:s[1])
		
		out = []
		for y in range(0,len(outy)):
			for x in range(0,len(outx)):
				k = 0
				for k in range(0,len(inp)):
					if (inp[k][1]>=outx[x][0]) and (inp[k][1]<=outx[x][1]) and (inp[k][2]>=outy[y][0]) and (inp[k][2]<=outy[y][1]):
						out.append(inp[k])
					else:
						k+=1
		return out
示例#2
0
文件: vq.py 项目: kamirow/Slicer4
def compare(m, Nobs, Ncodes, Nfeatures):
    obs = RandomArray.normal(0., 1., (Nobs, Nfeatures))
    codes = RandomArray.normal(0., 1., (Ncodes, Nfeatures))
    import scipy.cluster.vq
    scipy.cluster.vq
    print 'vq with %d observation, %d features and %d codes for %d iterations' % \
           (Nobs,Nfeatures,Ncodes,m)
    t1 = time.time()
    for i in range(m):
        code, dist = scipy.cluster.vq.py_vq(obs, codes)
    t2 = time.time()
    py = (t2 - t1)
    print ' speed in python:', (t2 - t1) / m
    print code[:2], dist[:2]

    t1 = time.time()
    for i in range(m):
        code, dist = scipy.cluster.vq.vq(obs, codes)
    t2 = time.time()
    print ' speed in standard c:', (t2 - t1) / m
    print code[:2], dist[:2]
    print ' speed up: %3.2f' % (py / (t2 - t1))

    # load into cache
    b = vq(obs, codes)
    t1 = time.time()
    for i in range(m):
        code, dist = vq(obs, codes)
    t2 = time.time()
    print ' speed inline/blitz:', (t2 - t1) / m
    print code[:2], dist[:2]
    print ' speed up: %3.2f' % (py / (t2 - t1))

    # load into cache
    b = vq2(obs, codes)
    t1 = time.time()
    for i in range(m):
        code, dist = vq2(obs, codes)
    t2 = time.time()
    print ' speed inline/blitz2:', (t2 - t1) / m
    print code[:2], dist[:2]
    print ' speed up: %3.2f' % (py / (t2 - t1))

    # load into cache
    b = vq3(obs, codes)
    t1 = time.time()
    for i in range(m):
        code, dist = vq3(obs, codes)
    t2 = time.time()
    print ' speed using C arrays:', (t2 - t1) / m
    print code[:2], dist[:2]
    print ' speed up: %3.2f' % (py / (t2 - t1))
示例#3
0
def k_mean_plot_AMN(c,folder, list_vectors_ANM):
    """Creates a k-means clustering mainly for dcds trayectories cluster"""
    for i in list_vectors_ANM:
	# DEFINE ONE VAR (fixed number of variables = number of PDB-DCD pairs = 4 in our case)
        var1 = list_vectors_ANM[0]
        var2 = list_vectors_ANM[1]
        var3 = list_vectors_ANM[2]
        var4 = list_vectors_ANM[3]

        features = np.array([])
	features=np.append(features,var1)
	features=np.append(features,var2)
	features=np.append(features,var3)
	features=np.append(features,var4)
	
        centroids,variance = kmeans(features,c)
        code,distance = vq(features,centroids)
	
        for j in range(len(var1)-1):
            pylab.plot([p[j] for p in var1],[p[j+1] for p in var1],'*')
            pylab.plot([p[j] for p in var2],[p[j+1] for p in var2],'r*') 
            pylab.plot([p[j] for p in var3],[p[j+1] for p in var3],'y*') 
            pylab.plot([p[j] for p in var4],[p[j+1] for p in var4],'g*') 
        #~ pylab.plot([p[0] for p in centroids],[p[1] for p in centroids],'go') 
        pylab.plot(centroids,centroids,'go') 
    pylab.savefig("./"+folder+"/kmeans_ANMnalysis.png")
示例#4
0
def clusterDataSpec(data, k, algorithm):
    '''
    Cluster the given data into a number of clusters determined by BIC.
    @param data: 2D numpy array holding our data.
    @param algorithm: 
    @raise LogicalError if algorithm is other than "k-means" or "GMM"
    @return The predicted labels (clusters) for every example.
    '''
    
    if algorithm not in ["k-means", "GMM"]:
        raise LogicalError, "Method %s: Clustering is made only through K-means or GMM." %(stack()[0][3])
    
    print "Clustering for k=%d." %(k) 
    if algorithm == "k-means":
        whiten(data)
        codebook, _distortion = kmeans(data, k, 10) # 10 iterations only to make it faster
    else:
        g = GMM(n_components=k,thresh = 1e-05, covariance_type='diag', n_iter=10)
        g.fit(data)
            
    #print "Optimal number of clusters according to BIC: %d." %(optimalK)
    
    # Return predicted labels
    if algorithm == "k-means":
        return vq(data, codebook)[0] # predictions on the same data
    else:
        return g.predict(data) # predictions on the same data
示例#5
0
def GetPupilKMeans(gray, K = 2, distanceWeight = 2, reSize = (40,40)):
	
	smallI = cv2.resize(gray, reSize)
	
	M,N = smallI.shape

	X,Y = np.meshgrid(range(M), range(N))

	z = smallI.flatten()
	x = X.flatten()
	y = Y.flatten()
	O = len(x)

	features = np.zeros((O, 3))
	features[:,0] = z
	features[:,1] = y / distanceWeight
	features[:,2] = x / distanceWeight

	features = np.array(features, 'f')

	centroids, variance = kmeans(features, K)
	print(centroids)
	label, distance = vq(features, centroids)

	labelIm = np.array(np.reshape(label, (M, N)))

	f = figure(1)
	imshow(labelIm)
	f.canvas.draw()
	f.show()
示例#6
0
    def BOWMatch(self, indexPath):
        '''the query's score against an individual index'''
        # start = time.time()
        query_des_list = []
        im_features, image_paths, idf, numWords, voc = joblib.load(indexPath)
        numWords = self.numWords

        desc = cv2.xfeatures2d.SIFT_create()
        # Extract the descriptors from the query 
        query = self.image
        kp, des = desc.detectAndCompute(query, None)
        query_des_list.append((query, des))

        # Stack query descriptors in a numpy array
        query_descriptors = query_des_list[0][1]

        # Calculate histogram of Features for the Query 
        test_features = np.zeros((1, numWords), "float32")
        words, distance = vq(query_descriptors, voc)
        for w in words:
            test_features[0][w] += 1 

        # Perform Tf-idf vectorization for the Query
        test_features = test_features * idf
        test_features = preprocessing.normalize(test_features, norm='l2')

        score = np.dot(test_features, im_features.T)
        return score
示例#7
0
def getBOVW(sift_key):
    global codebook
    new_line = ""
    sift_key_points = []

    lines = sift_key.readlines()
    lines = lines[1:]
    for i in range(len(lines)):
        if (i % 8) == 0:
            if new_line != "":
                new_line = new_line.strip()
                tokens = new_line.split()
                tokens = map(int, tokens)
                sift_key_points.append(tokens)
            new_line = ""
        else:
            new_line += (lines[i].strip() + ' ')

    sift_key_points = np.array(sift_key_points)
    codebook = np.array(codebook)
    idx, _ = vq(sift_key_points, codebook)

    BOVW = []
    for i in range(k):
        BOVW.append(list(idx).count(i+1)/ len(sift_key_points))

    return BOVW
示例#8
0
def detectPupilKMeans(gray,K=4,distanceWeight=1,reSize=(30,30)):
    smallI = cv2.resize(gray, reSize)
    M,N = smallI.shape
    X,Y = np.meshgrid(range(M),range(N))

    z = smallI.flatten()
    x = X.flatten()
    y = Y.flatten()
    O = len(x)

    #make a feature vectors containing (x,y,intensity)
    features = np.zeros((O,3))
    features[:,0] = z;
    features[:,1] = y/distanceWeight; #Divide so that the distance of position weighs less

    features[:,2] = x/distanceWeight;
    features = np.array(features,'f')
    # cluster data
    centroids,variance = kmeans(features,K)
    #use the found clusters to map
    label,distance = vq(features,centroids)
    # re-create image from
    labelIm = np.array(np.reshape(label,(M,N)))

    # Find the lowest valued class
    thr = 255
    for i in range(K):
        if(centroids[i][0] < thr):
            thr = centroids[i][0]

    return thr
示例#9
0
def buildHistogramForVideo(pathToVideo, vocabulary):
    frames = os.listdir(pathToVideo)
    size = len(vocabulary)

    stackOfHistogram = np.zeros(size).reshape(1, size)
    for frame in frames:
        # build histogram for this frame
        completePath = pathToVideo +"/"+ frame
        lines = open(completePath, "r").readlines()

        print completePath

        frameFeatures = np.zeros(128).reshape(1, 128)
        for line in lines[1:]:
            data = line.split(" ")
            feature = data[4:]

            for i in range(len(feature)):
                item = int(feature[i])
                feature[i] = item

            feature = normalizeSIFT(feature)
            frameFeatures = np.vstack((frameFeatures, feature))

        frameFeatures = frameFeatures[1:]
        codes, distance = vq(frameFeatures, vocabulary)

        histogram = np.zeros(size)
        for code in codes:
            histogram[code] += 1

        stackOfHistogram = np.vstack((stackOfHistogram, histogram.reshape(1,size)))

    return stackOfHistogram[1:]
def detectPupilKMeans(gray,K=2,distanceWeight=2,reSize=(40,40)):
	''' Detects the pupil in the image, gray, using k-means
			gray              : grays scale image
			K                 : Number of clusters
			distanceWeight    : Defines the weight of the position parameters
			reSize            : the size of the image to do k-means on
		'''
	#Resize for faster performance
	smallI = cv2.resize(gray, reSize)
	M,N = smallI.shape
	#Generate coordinates in a matrix
	X,Y = np.meshgrid(range(M),range(N))
	#Make coordinates and intensity into one vectors
	z = smallI.flatten()
	x = X.flatten()
	y = Y.flatten()
	O = len(x)
	#make a feature vectors containing (x,y,intensity)
	features = np.zeros((O,3))
	features[:,0] = z;
	features[:,1] = y/distanceWeight; #Divide so that the distance of position weighs less than intensity
	features[:,2] = x/distanceWeight;
	features = np.array(features,'f')
	# cluster data
	centroids,variance = kmeans(features,K)
	#use the found clusters to map
	label,distance = vq(features,centroids)
	# re-create image from
	labelIm = np.array(np.reshape(label,(M,N)))
	f = figure(1)
	imshow(labelIm)
	f.canvas.draw()
	f.show()
示例#11
0
def buildVLADForEachImageAtDifferentLevels(descriptorsOfImage, level):

    # Set width and height
    width = descriptorsOfImage.width
    height = descriptorsOfImage.height
    # calculate width and height step
    widthStep = int(width / 2)
    heightStep = int(height / 2)

    descriptors = descriptorsOfImage.descriptors

    # level 1, a list with size = 4 to store histograms at different location
    VLADOfLevelOne = np.zeros((4, k, dim))
    for descriptor in descriptors:
        x = descriptor.x
        y = descriptor.y
        boundaryIndex = int(x / widthStep)  + int(y / heightStep)

        feature = descriptor.descriptor
        shape = feature.shape[0]
        feature = feature.reshape(1, shape)

        codes, distance = vq(feature, k_means.cluster_centers_)
        
        VLADOfLevelOne[boundaryIndex][codes[0]] += np.array(feature).reshape(shape) - k_means.cluster_centers_[codes[0]]
    
    
    for i in xrange(4):
        # Square root norm
        VLADOfLevelOne[i] = np.sign(VLADOfLevelOne[i]) * np.sqrt(np.abs(VLADOfLevelOne[i]))
        # Local L2 norm
        vector_norm = np.linalg.norm(VLADOfLevelOne[i], axis = 1)
        vector_norm[vector_norm < 1] = 1
        
        VLADOfLevelOne[i] /= vector_norm[:, None]
    
    # level 0
    VLADOfLevelZero = VLADOfLevelOne[0] + VLADOfLevelOne[1] + VLADOfLevelOne[2] + VLADOfLevelOne[3]
    # Square root norm
    VLADOfLevelZero = np.sign(VLADOfLevelZero) * np.sqrt(np.abs(VLADOfLevelZero))
    # Local L2 norm
    vector_norm = np.linalg.norm(VLADOfLevelZero, axis = 1)
    vector_norm[vector_norm < 1] = 1
    
    VLADOfLevelZero /= vector_norm[:, None]
    
    if level == 0:
        return VLADOfLevelZero

    elif level == 1:
        tempZero = VLADOfLevelZero.flatten() * 0.5
        tempOne = VLADOfLevelOne.flatten() * 0.5
        result = np.concatenate((tempZero, tempOne))
        # Global L2 norm
        norm = np.linalg.norm(result)
        if norm > 1.0:
            result /= norm
        return result
    else:
        return None
示例#12
0
  def project(self,descriptors):
    """ 記述子をボキャブラリに射影して、
        単語のヒストグラムを作成する """
    #drawing = zeros((1000,1000))    
    dic = {}
    # ビジュアルワードのヒストグラム
    imhist = zeros((self.nbr_words))
    words,distance = vq(descriptors,self.voc)
    """
    tmp = list(set(words)) # 重複を排除したwordsを取得
    words = np.array(words)
    index = []
    for t in tmp:
      tmp_d = []
      index.append( np.where( words == t)[0] )
      for i in index:
        tmp_d.append([pointors[i,:]])
      dic[t] = tmp_d
      tmp_d = np.array(dic[t])
      dic[t] = np.sort(tmp_d, axis = 0)
      print dic[t]
      cv2.drawContours(drawing,dic[t],0,(0,255 -t,0),2)
    
    cv2.imshow( "Result", drawing )
    cv2.waitKey()  
    cv2.destroyAllWindows()
    """
    for w in words:
      imhist[w] += 1

    return imhist
示例#13
0
文件: ncut.py 项目: Adon-m/PCV
def cluster(S,k,ndim):
    """ Spectral clustering from a similarity matrix."""
    
    # check for symmetry
    if sum(abs(S-S.T)) > 1e-10:
        print 'not symmetric'
    
    # create Laplacian matrix
    rowsum = sum(abs(S),axis=0)
    D = diag(1 / sqrt(rowsum + 1e-6))
    L = dot(D,dot(S,D))
    
    # compute eigenvectors of L
    U,sigma,V = linalg.svd(L,full_matrices=False)
    
    # create feature vector from ndim first eigenvectors
    # by stacking eigenvectors as columns
    features = array(V[:ndim]).T

    # k-means
    features = whiten(features)
    centroids,distortion = kmeans(features,k)
    code,distance = vq(features,centroids)
        
    return code,V
示例#14
0
def cluster(S,k,ndim):
  """ 類似度行列からスペクトラルクラスタリングを行う """

  # 対称性をチェックする
  if sum(abs(S-S.T)) > 1e-10:
    print 'not symmetric'

  # ラプラシアン行列を作成する
  rowsum = sum(abs(S),axis=0)
  D = diag(1 / sqrt(rowsum + 1e-6))
  L = dot(D,dot(S,D))

  # Lの固有ベクトルを計算する
  U,sigma,V = linalg.svd(L)

  # 固有ベクトルの上位ndim個を列として並べて
  # 特徴量ベクトルを作成する
  features = array(V[:ndim]).T

  # k平均法
  features = whiten(features)
  centroids,distortion = kmeans(features,k)
  code,distance = vq(features,centroids)

  return code,V
示例#15
0
def classify(im):
    if im == None:
        print "No such file {}\nCheck if the file exists".format(image_path)
        return -1

    # Load the classifier, class names, scaler, number of clusters and vocabulary
    clf, classes_names, stdSlr, k, voc = joblib.load("bow.pkl")

    sift = cv2.xfeatures2d.SIFT_create()

    kpts, des = sift.detectAndCompute(im, None)

    test_features = np.zeros((1, k), "float32")

    # words, distance = vq(des_list[0][1],voc)
    words, distance = vq(des, voc)
    for w in words:
        test_features[0][w] += 1

    # Perform tf-idf vectorization
    nbr_occurences = np.sum((test_features > 0) * 1, axis=0)
    idf = np.array(np.log((1.0 * 1 + 1) / (1.0 * nbr_occurences + 1)), "float32")

    # Scale the features
    test_features = stdSlr.transform(test_features)

    # Perform the predictions
    predictions = [classes_names[i] for i in clf.predict(test_features)]
    return predictions
def predictor(im, w, queue):
    global fea_det
    global step_size
    global k
    global voc
    global clf
    global classes_names
    global stdSlr
    global image_paths
    best = 0
    for (x_pt, y_pt, window) in sliding_window(im, stepSize=16, windowSize=(w,w)):
        if window.shape[0] != w or window.shape[1] != w:
            continue
        kpts = [cv2.KeyPoint(x, y, step_size) for y in range(0, window.shape[0], step_size)
                                              for x in range(0, window.shape[1], step_size)]
        (kpts, des) = fea_det.compute(window, kpts) # compute dense descriptors
        des = whiten(des)
        test_features = np.zeros((len(image_paths), k), "float32")
        words, L2distance = vq(des, voc)
        for wd in words:
            test_features[0][wd] += 1
        nbr_occurences = np.sum( (test_features > 0) * 1, axis = 0)
        idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')
        test_features = stdSlr.transform(test_features)
        probs = np.array(clf.predict_proba(test_features))
        ind = np.argmax(probs)
        max_prob = np.max(probs)
        if max_prob > best:
            predictions = (classes_names[ind], max_prob)
            best = max_prob
            #print(predictions)
    queue.put(predictions)
示例#17
0
def imgFeatExtract(image_paths):
    # Create feature extraction and keypoint detector objects
    #surf = cv2.SURF()

    # Extract features, combine with image storage location
    des_list = []
    count = 1
    for image_path in image_paths:
        if ".jpg" in image_path:
            print 'processing image %s: \n%s' %(count, image_path)
            im = cv2.imread(image_path, 1) #read in image
            im = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY) #convert to grayscale
            im = cv2.resize(im, (im.shape[1],300)) #normalize shape
            sift_ocl = sift.SiftPlan(template=im, devicetype='GPU2')
            des = sift_ocl.keypoints(im)
            des = np.asarray([des[i][4] for i in xrange(len(des))])
            des = np.float32(des)
            ###deleted because of memory leak in cv2###
            #_, des = surf.detectAndCompute(im, None)
            des_list.append((image_path, des))
            count+=1

    # Stack all the descriptors vertically in a numpy array
    print 'stacking descriptor features in numpy array'
    count=1    
    descriptors = des_list[0][1]
    for image_path, descriptor in des_list[1:]:
        try:        
            if ".jpg" in image_path:
                print 'stacking image %s: \n%s' %(count, image_path)
                descriptors = np.vstack((descriptors, descriptor))
                count+=1
        except:
            print 'error! image %s: wrong size \n%s' %(count, image_path)
            pass
    
    #vocabulary = cluster centroids
    k=imgVoc #number of clusters
    print('performing image feature clustering K=%s' %k)
    voc, variance = kmeans(descriptors, k, 1) #voc = visual vocabulary

    # Calculate frequency vector
    print('creating img frequency vector')
    im_features = np.zeros((len(image_paths), k), "float32")
    for i in xrange(len(image_paths)):
        if ".jpg" in image_path:
            words, distance = vq(des_list[i][1],voc)
            for w in words:
                im_features[i][w] += 1

    # Standardization for input ot linear classifier
    print('standardizing img input for classification')
    stdSlr = StandardScaler().fit(im_features)
    im_features = stdSlr.transform(im_features)
    
    #save image classifier
    #joblib.dump((clf, training_names, stdSlr, k, voc), "imgclf.pkl", compress=3)    

    return(im_features,voc)
示例#18
0
 def project(self, descript):
     """将描述子投影到词汇上,以创建单词直方图"""
     # 图像单词直方图
     imhist = np.zeros((self.nbr_words))
     words, distance = vq(descript, self.voc)
     for w in words:
         imhist[w] += 1
     return imhist
示例#19
0
文件: Vocabulary.py 项目: BioSok/repo
 def project(self, descriptors):
     """ project descriptors on the vocabulary
             to create a histogram of words"""
     imhist = np.zeros((self.nbr_words))
     words, distance = vq(descriptors, self.voc)
     for w in words:
         imhist[w] += 1
     return imhist
示例#20
0
def get_nearest(all_coords, proj, centroid):
    #find which frame in trajectory is the closest to the desired deformation
    #the first centroid is kept as reference (should replace 0 by an iterator, when using multiple centroids)
    pos=proj[:,centroid]+deform_coeffs
    code,min_dist=vq(proj.transpose(),np.array([pos]))
    target_frame=min_dist.argmin()

    return all_coords[:,target_frame]
    def buildHistogram(self, imageFeature, vocabulary):
        vocSize = len(vocabulary)
        histogram = np.zeros(vocSize)

        codes, distance = vq(imageFeature, vocabulary)
        for code in codes:
            histogram[code] += 1

        return histogram
    def buildHistogramForEachImageAtDifferentLevels(self, descriptorsOfImage, level):

        width = descriptorsOfImage.width
        height = descriptorsOfImage.height
        widthStep = int(width / 4)
        heightStep = int(height / 4)

        descriptors = descriptorsOfImage.descriptors


        # level 2, a list with size = 16 to store histograms at different location
        histogramOfLevelTwo = np.zeros((16, self.size))
        for descriptor in descriptors:
            x = descriptor.x
            y = descriptor.y
            boundaryIndex = int(x / widthStep)  + int(y / heightStep) *4

            feature = descriptor.descriptor
            shape = feature.shape[0]
            feature = feature.reshape(1, shape)

            codes, distance = vq(feature, self.vocabulary)
            # print "W:", width, height, widthStep, heightStep, boundaryIndex, x, y
            if boundaryIndex < 16:
                histogramOfLevelTwo[boundaryIndex][codes[0]] += 1

        # level 1, based on histograms generated on level two
        histogramOfLevelOne = np.zeros((4, self.size))
        histogramOfLevelOne[0] = histogramOfLevelTwo[0] + histogramOfLevelTwo[1] + histogramOfLevelTwo[4] + histogramOfLevelTwo[5]
        histogramOfLevelOne[1] = histogramOfLevelTwo[2] + histogramOfLevelTwo[3] + histogramOfLevelTwo[6] + histogramOfLevelTwo[7]
        histogramOfLevelOne[2] = histogramOfLevelTwo[8] + histogramOfLevelTwo[9] + histogramOfLevelTwo[12] + histogramOfLevelTwo[13]
        histogramOfLevelOne[3] = histogramOfLevelTwo[10] + histogramOfLevelTwo[11] + histogramOfLevelTwo[14] + histogramOfLevelTwo[15]

        # level 0
        histogramOfLevelZero = histogramOfLevelOne[0] + histogramOfLevelOne[1] + histogramOfLevelOne[2] + histogramOfLevelOne[3]


        if level == 0:
            return histogramOfLevelZero

        elif level == 1:
            tempZero = histogramOfLevelZero.flatten() * 0.5
            tempOne = histogramOfLevelOne.flatten() * 0.5
            result = np.concatenate((tempZero, tempOne))
            return result

        elif level == 2:

            tempZero = histogramOfLevelZero.flatten() * 0.25
            tempOne = histogramOfLevelOne.flatten() * 0.25
            tempTwo = histogramOfLevelTwo.flatten() * 0.5
            result = np.concatenate((tempZero, tempOne, tempTwo))
            return result

        else:
            return None
示例#23
0
def theclust(vgm):
	############################################
	sfeature=np.vstack((vgm.spos));
	centroids,variance=kmeans(sfeature,vgm.scnum)
	code,distance=vq(sfeature,centroids)
	#plt.figure()
	for sciter in range(vgm.scnum):
		ndx=np.where(code==sciter)[0];
		for iter in range(len(sfeature[ndx,0])):
			tempos=[sfeature[ndx,0][iter],sfeature[ndx,1][iter]];
			vgm.scvec[sciter].append(FindSIndexByPos(vgm,tempos));
		#plt.scatter(sfeature[ndx,0],sfeature[ndx,1],c=np.random.random(size=3))
	##############################################

	############################################
	tfeature=np.vstack((vgm.tpos));
	centroids,variance=kmeans(tfeature,vgm.tcnum)
	code,distance=vq(tfeature,centroids)
	#plt.figure()
	for tciter in range(vgm.tcnum):
		ndx=np.where(code==tciter)[0];
		for iter in range(len(tfeature[ndx,0])):
			tempos=[tfeature[ndx,0][iter],tfeature[ndx,1][iter]];
			vgm.tcvec[tciter].append(FindTIndexByPos(vgm,tempos));
		#plt.scatter(sfeature[ndx,0],sfeature[ndx,1],c=np.random.random(size=3))
	##############################################

	############################################
	dfeature=np.vstack((vgm.dpos));
	centroids,variance=kmeans(dfeature,vgm.dcnum)
	code,distance=vq(dfeature,centroids)
	#plt.figure()
	for dciter in range(vgm.dcnum):
		ndx=np.where(code==dciter)[0];
		for iter in range(len(dfeature[ndx,0])):
			tempos=[dfeature[ndx,0][iter],dfeature[ndx,1][iter]];
			vgm.dcvec[dciter].append(FindDIndexByPos(vgm,tempos));
		#plt.scatter(sfeature[ndx,0],sfeature[ndx,1],c=np.random.random(size=3))
	##############################################

	#plt.axis('off')
	#plt.show()
	return vgm;
def fetchClass (imagetoClassify):
    kpts, des = sift.detectAndCompute(imagetoClassify, None)
    test_features = np.zeros((1, k), "float32")
    words, distance = vq(des,voc)
    for w in words:
        test_features[0][w] += 1
    #nbr_occurences = np.sum( (test_features > 0) * 1, axis = 0)
    #idf = np.array(np.log((1.0+1) / (1.0*nbr_occurences + 1)), 'float32')
    test_features = stdSlr.transform(test_features)
    predictions =  [classes_names[i] for i in clf.predict(test_features)]
    return predictions           
def TestSampleFeaturesGenerator(image_path):
	stdSlr, k, voc = joblib.load("bof.pkl")

	image_paths = imutils.imlist(image_path)
# List where all the descriptors are stored
	des_list = []
	HH = []
	for image_path in image_paths:
	    im = cv2.imread(image_path)
	    if im == None:
	        print "No such file {}\nCheck if the file exists".format(image_path)
	        exit()
	    kpts, des = sift.detectAndCompute(im, None)
	    hsv = cv2.cvtColor(im,cv2.COLOR_BGR2HSV)
	    kernel = np.ones((50,50),np.float32)/2500
	    hsv = cv2.filter2D(hsv,-1,kernel)
	    h_hue = cv2.calcHist( [hsv], [0], None, [180], [0, 180] )
	    H = []
	    n_hue = sum(h_hue)
	    for h in h_hue:
	        hh = np.float32(float(h)/float(n_hue))
	        H.append(hh)
	    
	    h_sat = cv2.calcHist( [hsv], [1], None, [256], [0, 256] )
	    n_sat = sum(h_sat)
	    for h in h_sat:
	        hh = np.float32(float(h)/float(n_sat))
	        H.append(hh) 
	    HH.append(H)
	    des_list.append((image_path, des))   

	# Stack all the descriptors vertically in a numpy array
	# print des_list
	descriptors = des_list[0][1]
	for image_path, descriptor in des_list[0:]:
	    descriptors = np.vstack((descriptors, descriptor)) 
	# 
	test_features = np.zeros((len(image_paths), k), "float32")
	for i in xrange(len(image_paths)):
	    words, distance = vq(des_list[i][1],voc)
	    for w in words:
	        test_features[i][w] += 1

	# Scale the features
	test_features = stdSlr.transform(test_features)
	test_features = np.append(test_features, HH, axis = 1)
	fl = open('TestFeature.csv', 'w')

	writer = csv.writer(fl)
	for values in test_features:
	    writer.writerow(values)

	fl.close() 
	return test_features
示例#26
0
 def project(self,descriptors):
     """ 記述子をボキャブラリに射影して、
         単語のヒストグラムを作成する"""
     
     # ビジュアルワードのヒストグラム 
     imhist = zeros((self.nbr_words))
     words,distance = vq(descriptors,self.voc)
     for w in words:
         imhist[w] += 1
     
     return imhist
示例#27
0
  def project(self,descriptors):
	""" Project descriptors on the vocab
	  to create a histogram of words """

	# histogram of image words
	imhist = zeros((self.nbr_words))
	words,distance = vq(descriptors,self.voc)
	for w in words:
	  imhist[w] += 1

	return imhist
示例#28
0
文件: BoF.py 项目: josefien/Project1
 def _createHistOfFeatures(self, descriptors):
     feats = np.zeros((1, self.vocab_size), "float32")
     # Each descriptor in the descriptor list is assigned its nearest visual "word".
     # words is a length M array, where M is the number of descriptors for
     # the given image. Each entry in words stores an index to the nearest
     # visual word in the vocabulary.
     words, distance = vq(descriptors,self.vocab)
     # for each vocabulary index in words, increment the count for that word
     # in the histogram
     for w in words:
         feats[0][w] += 1
     return feats
示例#29
0
文件: HW2.py 项目: mfintz/CV
def getImagesBOWs(image_paths, extractor, quantization=[], k=100, iterations=3):
    des_list = []

    for image_path in image_paths:
        print("Reading image from " + image_path)
        img = cv2.imread(image_path)

        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        kp, des = extractor.detectAndCompute(gray, None)
        des_list.append((image_path, des))

    #
    # The code below is taken from a snippet from the Internet - grouping
    # descriptiors in a stack
    #
    descriptors = des_list[0][1]
    for image_path, descriptor in des_list[1:]:
        descriptors = np.vstack((descriptors, descriptor))

    #
    # Perform k-means clustering by building the vocabulary.
    # There is a way to pass a quantization as a parameter - this will be used
    # when processing the test images.
    # For the test images we want to use the same quantization which was
    # calculated on the DB images (aka training set)
    #
    if quantization == []:
        print("Start building the quantization")
        voc, variance = kmeans(descriptors, k, iterations)
        print("Quantization built")
    else:
        print("Using the preset quantization")
        voc = quantization

    #
    # Calculate the BOWs for each image
    # this small code is also take forn an Internet snippet
    #
    im_features = np.zeros((len(image_paths), k), "float32")
    for i in range(len(image_paths)):
        words, distance = vq(des_list[i][1], voc)
        for w in words:
            im_features[i][w] += 1

    #
    # Scaling was proposed in the Internet.  We may omit this, as it works
    # without it.
    #
    stdSlr = StandardScaler().fit(im_features)
    im_features = stdSlr.transform(im_features)

    return (im_features, voc)
示例#30
0
    def buildHistogram(self, imageDescriptors):
        histogram = np.zeros(self.size)

        stackFeatures = imageDescriptors[0].descriptor
        for descriptor in imageDescriptors[1:]:
            descriptor = descriptor.descriptor
            stackFeatures = np.vstack((stackFeatures, descriptor))

        codes, distance = vq(stackFeatures, self.vocabulary)

        for code in codes:
            histogram[code] += 1
        return histogram
示例#31
0
def get_im_features(category, pkl_path, num_words=1000):
    des_list, _ = joblib.load('pkl/des_list_{}.pkl'.format(category))

    # 在修改完上面的代码以后,就不用去除 descriptor空值
    # for image_path, descriptor in des_list:
    #     if descriptor is None:
    #         des_list.remove((image_path, descriptor))

    image_paths = [x[0] for x in des_list]

    # Stack all the descriptors vertically in a numpy array
    print('-----------------------------------------')
    print('Start stacking of the descriptors......')

    # 方法1,np.vstack
    # i = 0
    # start_time = time.time()
    # descriptors = des_list[0][1]
    # for image_path, descriptor in des_list[1:]:
    #     descriptors = np.vstack((descriptors, descriptor))
    #     i += 1
    #     if i % 100 == 0:
    #         print('[{:.1f}s] The {}/{} descriptors has been stacked.'.format(
    # time.time() - start_time, i, len(des_list)))
    #         start_time = time.time()

    # 方法2,显著提升性能
    des_sum = 0
    for image_path, descriptor in des_list:
        des_sum += descriptor.shape[0]
    descriptors = np.zeros((des_sum, 128))

    position = 0
    for image_path, descriptor in des_list:
        sz = len(descriptor)
        descriptors[position:position + sz] = descriptor
        position += sz

    # Perform k-means clustering
    start_time = time.time()
    print('-----------------------------------------')
    print("Start k-means: %d words, %d key points..." % (num_words, descriptors.shape[0]))
    voc, variance = kmeans(descriptors, num_words, 1)
    print('[{:.1f}s] K-means has done.'.format(time.time() - start_time))

    # Calculate the histogram of features
    im_features = np.zeros((len(image_paths), num_words), "float32")
    for i in range(len(image_paths)):
        words, distance = vq(des_list[i][1], voc)
        for w in words:
            im_features[i][w] += 1

    # Perform Tf-Idf vectorization
    nbr_occurrences = np.sum((im_features > 0) * 1, axis=0)
    idf = np.array(np.log((1.0 * len(image_paths) + 1) / (1.0 * nbr_occurrences + 1)), 'float32')

    # Perform L2 normalization
    im_features = im_features * idf
    im_features = preprocessing.normalize(im_features, norm='l2')

    print('-----------------------------------------')
    print('正在保存模型参数...')
    joblib.dump((im_features, image_paths, idf, num_words, voc), pkl_path, compress=3)
示例#32
0
plt.plot(x, norm.pdf(x, mean, std), 'r', label='Normal fit')
plt.legend()
plt.show()
print('#', 50 * "-")
# -----------------------
from scipy.stats import norm
from numpy import array, vstack
from scipy.cluster.vq import *

data = norm.rvs(0, 0.3, size=(10000, 2))
inside_ball = numpy.hypot(data[:, 0], data[:, 1]) < 1.0
data = data[inside_ball]
data = vstack((data, data + array([1, 1]), data + array([-1, 1])))

centroids, distortion = kmeans(data, 3)
cluster_assignment, distances = vq(data, centroids)

plt.rcParams['figure.figsize'] = (8.0, 6.0)
plt.plot(data[cluster_assignment == 0, 0], data[cluster_assignment == 0, 1],
         'ro')
plt.plot(data[cluster_assignment == 1, 0], data[cluster_assignment == 1, 1],
         'b+')
plt.plot(data[cluster_assignment == 2, 0], data[cluster_assignment == 2, 1],
         'k.')
plt.show()
print('#', 50 * "-")
# -----------------------
from scipy.cluster.hierarchy import linkage, dendrogram

file = open("data.dat", "r")
lines = file.readlines()
示例#33
0
 def mapping(self, descriptors):
     words, distance = vq(descriptors, self.voc)
     return words
示例#34
0
def createHistogram(descriptor_list, voc, k):
    features = np.zeros(k, "float32")
    words, distance = vq(descriptor_list, voc)
    for w in words:
        features[w] += 1
    return features
示例#35
0
        goodClusterCountList = []
        for iteration in range(10):
            print "Iteration #%d." % (iteration + 1)
            currentExamples = np.array(
                getRandomSample(SIFTData, 0.5, range(1, 102)))
            # print "CurrentExamples = " + str(currentExamples)
            # We need to extract the class means as well as the example mappings
            # from the "currentExamples" ndarray.
            categoryMeans, exampleHash = extractLabelInfo(currentExamples)
            currentExamples = stripLastColumn(currentExamples)
            # Run K-means

            whiten(currentExamples)
            print "Running K-means..."
            codebook, _distortion = kmeans(currentExamples, 101, 10)
            assignments, _distortion = vq(currentExamples, codebook)
            print "Ran K-means"
            if (len(assignments) != currentExamples.shape[0]):
                raise LogicalError, "Method %s: K-means should have computed %d assignments; instead, it computed %d." % (
                    stack()[0][3], SIFTData.shape[0], len(assignments))
            errorRate, goodClusters = evaluateClustering(
                codebook, currentExamples, assignments, categoryMeans,
                exampleHash, 101)
            errorRateList.append(errorRate)
            goodClusterCountList.append(goodClusters)
            print "K-means produced an error rate of %.4f%% in iteration %d while computing %d \"good\" clusters.." % (
                100 * errorRate, iteration + 1, goodClusters)

        print "On average, we had %.4f \"good\" clusters and %.4f%% error rate." % (
            np.mean(goodClusterCountList), np.mean(errorRateList))
        fp = open('output_data/errorRate_SIFTFeatures.txt', 'w')
示例#36
0
文件: bow.py 项目: SmallHedgehog/IRS
        images.append(image)
    except:
        pass
    count += 1
descriptors = features[0][1]
for _, descriptor in features:
    descriptors = np.vstack((descriptors, descriptor))

# CLUSTER
codes = 1000
codebook, var = kmeans(obs=descriptors, k_or_guess=codes)

# THE HISTOGRAM OF FEATURES
im_features = np.zeros((len(features), codes))
for idx in range(len(features)):
    words, dis = vq(features[idx][1], code_book=codebook)
    for w in words:
        im_features[idx][w] += 1

# TF-IDF
occur = np.sum((im_features > 0) * 1, axis=0)
idf = np.array(np.log((1.0 * len(im_features) + 1) / (1.0 * occur + 1)))

# L2-NORMALIZATION
im_features = im_features * idf
im_features = preprocessing.normalize(im_features, norm='l2')

infos = {
    'images': images,
    'idf': idf,
    'im_features': im_features,
示例#37
0
##########################################

im = array(Image.open(imlist[0]))
m, n = im.shape[0:2]
imnbr = len(imlist)

for i in range(imnbr):
    tmpArr = [array(Image.open(imlist[i])).flatten()]

immatrix = array(tmpArr, 'f')

V, S, immean = pca.pca(immatrix)

immean = immean.flatten()
projected = array([dot(V[:40], immatrix[i] - immean) for i in range(imnbr)])

projected = whiten(projected)
centroids, distortion = kmeans(projected, 2)

code, distance = vq(projected, centroids)

for k in range(2):
    ind = where(code == k)[0]
    figure()
    gray()
    for i in range(minimum(len(ind), 40)):
        subplot(2, 10, i + 1)
        imshow(immatrix[ind[i]].reshape((25, 25)))
        axis('off')
show()
示例#38
0
    des_list.append((image_path, des))
    sum_image += 1

des_train = des_list[0][1]
for path, temp_des in des_list[1:]:
    des_train = np.vstack((des_train, temp_des))

print("Dang phan cum tat ca cac features.......")
sum_cluster = 1000
voc, distortion = kmeans(des_train, sum_cluster, 1)

print("Tao xong Voc")
img_vs_cluster = np.zeros((sum_image, sum_cluster))

for i in range(sum_image):
    words, dis_vs_clus = vq(des_list[i][1], voc)
    print("do dai words: ", len(words))
    print("Mang phan cum cho cac feature cua image ", des_list[i][0], " la: ",
          words)
    for w in words:
        img_vs_cluster[i][w] += 1

print("\nMang tan suat cua image va cluster  ", img_vs_cluster)

# so image co chua cluster
df = np.sum((img_vs_cluster > 0) * 1, axis=0)
print("\ndf: ", df)

idf = np.array(np.log(sum_image / df), 'float32')
print("\nidf: ", idf)
示例#39
0
    def train(self, data):
        """
        Train the feature with the given data. Bow need a K-Mean clustering which is very long. It must generate a
        vocabulary of visual words.

        :param data: List of images
        :return: Nothing
        """
        # Construct SIFT object
        sift = cv2.xfeatures2d.SIFT_create()
        # SIFT descriptors list
        descriptors = []
        # Loops over all images and compute SIFT descriptors
        for img_path in data:
            gray = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2GRAY)
            # Compute SIFT
            kp, des = sift.detectAndCompute(gray, None)
            # Append image SIFT descriptors to main array
            descriptors.append(des)
        # descriptors len = number of images
        # descriptors[x] len = number of feature for images
        # descriptors[x][y] len = 128 -> SIFT descriptor

        # Put data together
        t_descriptors = descriptors[0][1]
        for i, descriptor in enumerate(descriptors[1:]):
            if isinstance(descriptor, np.ndarray):
                t_descriptors = np.vstack((t_descriptors, descriptor))

        # t_descriptor len = total number of images features
        # t_descriptor[x] len = 128 -> SIFT descriptor

        parameters = self.__load_default()

        # VOCABULARY GENERATION
        # Stop the iteration when any of the condition is met (accuracy and max number of iterations)
        criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER,
                    int(parameters[1]), 1.0)
        # K-mean clustering, vocabulary has k words
        ret, label, vocabulary = cv2.kmeans(np.float32(t_descriptors),
                                            int(parameters[0]), None, criteria,
                                            int(parameters[2]),
                                            cv2.KMEANS_RANDOM_CENTERS)

        # Save vocabulary
        np.savetxt(
            os.path.join(self.feature_directory,
                         'default_' + '_'.join(parameters), 'voc.out'),
            vocabulary)

        # Compute histograms
        # Histograms generation array of (images count, number of words)
        histograms = np.zeros((len(data), int(parameters[0])))
        for i in range(len(data)):
            if isinstance(descriptors[i], np.ndarray):
                # Assign codes from a code book to observations.
                words, distance = vq(descriptors[i], vocabulary)
                for w in words:
                    histograms[i][w] += 1

        # Create standard scaler and standardize data
        std_slr = sk.StandardScaler().fit(histograms)
        # Save standard scaler
        pickle.dump(
            std_slr,
            open(
                os.path.join(self.feature_directory,
                             'default_' + '_'.join(parameters), "std.out"),
                "wb"))

        if parameters[3] == 'True':
            histograms = std_slr.transform(histograms)

        # Save histograms and standard scaler
        np.savetxt(
            os.path.join(self.feature_directory,
                         'default_' + '_'.join(parameters), "histogram.out"),
            histograms)
示例#40
0
with open(ctr_clusters) as f:
    # initialize the CSV reader
    cl_reader = csv.reader(f)

    #read cluster centers from file	 and stack them to numpy array
    centers = np.zeros((1, dimension), "float32")
    for row in cl_reader:
        vis_word = [float(x) for x in row]
        centers = np.vstack((centers, vis_word))
        imageCount = imageCount + 1
    centers = np.delete(centers, (0), axis=0)

    #compute histogram for query image
    query_hstm = np.zeros(k, "float32")
    words, distance = vq(dsc, centers)
    for w in words:
        query_hstm[w] += 1
    print(query_hstm)

    f.close()

indexPath = (args["index"])
results = {}
with open(indexPath) as fl:
    reader = csv.reader(fl)
    # loop over the rows in the index
    for row in reader:
        # parse out the image ID and features, then compute the
        # chi-squared distance between the features in our index
        # and our query features
示例#41
0
for image_path in image_paths:
    im = cv2.imread(image_path)
    #kpts = fea_det.detect(im)
    kpts, des = sift_object.detectAndCompute(im, None)
    des_list.append((image_path, des))   
    
# Stack all the descriptors vertically in a numpy array
descriptors = des_list[0][1]
for image_path, descriptor in des_list[1:]:
    descriptors = np.vstack((descriptors, descriptor))  

# Perform k-means clustering
k = 100
voc, variance = kmeans(descriptors, k, 1) 
idx, _ = vq(descriptors,voc)
arr_of_count_clusters = np.bincount(idx)
print(arr_of_count_clusters)

# Calculate the histogram of features
im_features = np.zeros((len(image_paths), k), "float32")
for i in xrange(len(image_paths)):
    words, distance = vq(des_list[i][1],voc)
    for w in words:
        im_features[i][w] += 1

# Perform Tf-Idf vectorization
nbr_occurences = np.sum( (im_features > 0) * 1, axis = 0)
idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

# Scaling the words
import numpy as np
from scipy.cluster.vq import *
import external_index

obs = np.load("./features_imagenet_vgg16/features_imagenet_vgg16.npy")
obs = whiten(obs)
[codebook, distortion] = kmeans(obs, 10)
[code, dist] = vq(obs, codebook)
for i in range(10):
    print(code[(i * 100):(i + 1) * 100])

reference_model = np.zeros(1000)
for i in range(10):
    reference_model[i * 100:(i + 1) * 100] = np.ones(100) * i

index = external_index.external_index(code, reference_model)
print(index)
示例#43
0
def patila_vs_lehenga(img):
    # Get the training classes names and store them in a list
    #cascadePath = "haarcascade_frontalface_default.xml"
    train_path = "D:\\tt\\"
    training_names = os.listdir(train_path)

    image_paths = []  # Inilialising the list
    image_classes = []  # Inilialising the list
    class_id = 0
    for (i, training_name) in enumerate(training_names):
        label = training_name.split(os.path.sep)[-1].split(" ")[0]
        temp = 'b'
        if label == 'lehenga':
            temp = 'legenga'
        else:
            temp = 'patiyala'
        image_paths.append(training_name)
        image_classes.append(temp)
        class_id += 1

    sift = cv2.xfeatures2d.SIFT_create()
    # List where all the descriptors are stored
    des_list = []
    # Reading the image and calculating the features and corresponding descriptors
    for image_pat in image_paths:
        image_path = train_path + image_pat
        im = cv2.imread(image_path)
        #print (image_path)
        dst = edges_detected(im)
        ret_train, thresh_train = cv2.threshold(dst, 225, 255, 0)
        kpts, des = sift.detectAndCompute(dst, None)
        des_list.append(
            (image_path,
             des))  # Appending all the descriptors into the single list

    # Stack all the descriptors vertically in a numpy array
    descriptors = des_list[0][1]
    for image_path, descriptor in des_list[1:]:
        descriptors = np.vstack(
            (descriptors, descriptor))  # Stacking the descriptors

    # Perform k-means clustering
    k = 50  # Number of clusters
    voc, variance = kmeans(descriptors, k,
                           1)  # Perform Kmeans with default values

    # Calculate the histogram of features
    im_features = np.zeros((len(image_paths), k), "float32")
    for i in range(len(image_paths)):
        words, distance = vq(des_list[i][1], voc)
        for w in words:
            im_features[i][w] += 1

    # Perform Tf-Idf vectorization
    nbr_occurences = np.sum((im_features > 0) * 1, axis=0)
    # Calculating the number of occurrences
    idf = np.array(
        np.log((1.0 * len(image_paths) + 1) / (1.0 * nbr_occurences + 1)),
        'float32')
    # Giving weight to one that occurs more frequently

    # Scaling the words
    stdSlr = StandardScaler().fit(im_features)
    im_features = stdSlr.transform(
        im_features)  # Scaling the visual words for better Prediction

    # Load the classifier, class names, scaler, number of clusters and vocabulary
    samples = im_features
    responses = np.array(image_classes)
    classes_names = training_names
    voc = voc
    clf = LinearRegression()
    #Use - rawImages and labels for traing the model.
    clf.fit(samples, responses)
    #args = vars(parser.parse_args())
    image_paths1 = [img]
    height, width = img.shape[:2]
    img = img[np.int(height / 2):height, 0:width]
    #cv2.imshow('in image',img)
    # List where all the descriptors are stored
    des_list1 = []
    dst = edges_detected(img)
    ret_train, thresh_train = cv2.threshold(dst, 225, 255, 0)
    kpts1, des1 = sift.detectAndCompute(dst, None)
    des_list1.append(
        (image_paths1, des1))  # Appending the descriptors to a single list

    # Stack all the descriptors vertically in a numpy array
    descriptors1 = des_list1[0][1]
    test_features1 = np.zeros((len(image_paths1), k), "float32")
    for i in range(len(image_paths1)):
        words1, distance1 = vq(des_list1[i][1], voc)
        for w in words1:
            test_features1[i][w] += 1  # Calculating the histogram of features

    # Perform Tf-Idf vectorization
    nbr_occurences1 = np.sum(
        (test_features1 > 0) * 1,
        axis=0)  # Getting the number of occurrences of each word
    idf1 = np.array(
        np.log((1.0 * len(image_paths1) + 1) / (1.0 * nbr_occurences1 + 1)),
        'float32')

    op = clf.predict(test_features1)
    #print (op)
    return op
示例#44
0
i = 1
descriptors = des_list[0][1]
for image_path, descriptor in des_list[1:]:
    descriptors = np.vstack(
        (descriptors, descriptor))  # Stacking the descriptors
    print("Progress: ", i)
    i = i + 1

# Perform k-means clustering
k = 500  # Number of clusters
voc, variance = kmeans(descriptors, k, 1)  # Perform Kmeans with default values

# Calculate the histogram of features
im_features = np.zeros((len(image_paths), k), "float32")
for i in range(len(image_paths)):
    words, distance = vq(des_list[i][1], voc)
    for w in words:
        im_features[i][w] += 1
    print("Extracting features...")

# Perform Tf-Idf vectorization
nbr_occurences = np.sum((im_features > 0) * 1, axis=0)
# Calculating the number of occurrences
idf = np.array(
    np.log((1.0 * len(image_paths) + 1) / (1.0 * nbr_occurences + 1)),
    'float32')
# Giving weight to one that occurs more frequently

# Scaling the words
stdSlr = StandardScaler().fit(im_features)
im_features = stdSlr.transform(
示例#45
0
def indexing():
    # Get the training classes names and store them in a list
    train_path = settings.PI_IM_RESOURCES_DIR

    training_names = os.listdir(train_path)

    numWords = 1000

    # Get all the path to the images and save them in a list
    # image_paths and the corresponding label in image_paths
    image_paths = []
    for training_name in training_names:
        image_path = os.path.join(train_path, training_name)
        image_paths += [image_path]

    # Create feature extraction and keypoint detector objects
    # fea_det = cv2.FeatureDetector_create("SIFT")
    # des_ext = cv2.DescriptorExtractor_create("SIFT")

    # List where all the descriptors are stored
    des_list = []
    sift = cv2.xfeatures2d.SIFT_create()

    for i, image_path in enumerate(image_paths):
        im = cv2.imread(image_path)
        print("Extract SIFT of %s image, %d of %d images" % (training_names[i], i, len(image_paths)))
        gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
        kp, des = sift.detectAndCompute(gray, None)
        des_list.append((image_path, des))

    # Stack all the descriptors vertically in a numpy array
    # downsampling = 1
    # descriptors = des_list[0][1][::downsampling,:]
    # for image_path, descriptor in des_list[1:]:
    #    descriptors = np.vstack((descriptors, descriptor[::downsampling,:]))

    # Stack all the descriptors vertically in a numpy array
    descriptors = des_list[0][1]
    for image_path, descriptor in des_list[1:]:
        descriptors = np.vstack((descriptors, descriptor))

    # Perform k-means clustering
    print("Start k-means: %d words, %d key points" % (numWords, descriptors.shape[0]))
    voc, variance = kmeans(descriptors, numWords, 1)

    # Calculate the histogram of features
    im_features = np.zeros((len(image_paths), numWords), "float32")
    for i in range(len(image_paths)):
        words, distance = vq(des_list[i][1], voc)
        for w in words:
            im_features[i][w] += 1

    # Perform Tf-Idf vectorization
    nbr_occurences = np.sum((im_features > 0) * 1, axis=0)
    idf = np.array(np.log((1.0 * len(image_paths) + 1) / (1.0 * nbr_occurences + 1)), 'float32')

    # Perform L2 normalization
    im_features = im_features * idf
    im_features = preprocessing.normalize(im_features, norm='l2')

    joblib.dump((im_features, image_paths, training_names, idf, numWords, voc), "bof_retr.pkl", compress=3)

    return "Indexed: k-means: %d words, %d key points" % (numWords, descriptors.shape[0])
示例#46
0
query_images = os.listdir(
    '../Data/Oxford-5k/cropped_query_images/') if ifcropped else os.listdir(
        '../Data/Oxford-5k/query_images/')
im_features, image_paths, idf, numWords, voc, nfeatures = joblib.load(
    '../Data/Oxford-5k/BOF/BOF_256features.pkl')
sift = cv2.xfeatures2d.SIFT_create(nfeatures=nfeatures)
aps = []
for query_image in query_images:
    query_name = os.path.splitext(os.path.basename(query_image))[0]
    img = cv2.imread('../Data/Oxford-5k/cropped_query_images/' +
                     query_image) if ifcropped else cv2.imread(
                         '../Data/Oxford-5k/query_images/' + query_image)
    kps, des = sift.detectAndCompute(img, None)
    inputFeature = np.zeros((1, numWords), "float32")
    words, distance = vq(des, voc)
    for w in words:
        inputFeature[0][w] += 1

    # Perform L2 normalization
    inputFeature = inputFeature * idf
    inputFeature = preprocessing.normalize(inputFeature, norm='l2')

    matchList = matchImages(inputFeature, QESize=QESize)

    rankFilePath = '../Data/Oxford-5k/temp.txt'
    rankFile = open(rankFilePath, 'w')
    rankFile.writelines(match + '\n' for match in matchList)
    rankFile.close()

    gt_file = '../Data/Oxford-5k/gt_files/' + query_name
示例#47
0
def im_features_create(im_features, des_list, voc, image_paths):
    for i in range(len(image_paths)):
        words, distance = vq(des_list[i][1], voc)
        #print len(test_features[i])
        for w in words:
            im_features[i][w] += 1
示例#48
0
文件: KM06_01_01.py 项目: wosxcc/bot
import matplotlib.pyplot as plt
import numpy as np
from  scipy.cluster.vq import *
from sklearn.datasets.samples_generator import make_blobs

centers = [[-7, -7], [-8, 7.5], [9.5, -6], [9, 8.5]] # 簇中心
N = 300
# 生成人工数据集
#data, features = make_circles(n_samples=200, shuffle=True, noise=0.1, factor=0.4)
data, features = make_blobs(n_samples=N, centers=centers, n_features = 2, cluster_std=0.8, shuffle=False, random_state=42)

print(data.shape)

centroids,variance = kmeans(data,4)
code,distance=vq(data,centroids)

# print('code',code)
#
# print('distance',distance)
# print('variance',variance)

# print(data.transpose()[0])
fig, ax = plt.subplots()
for  i in range(len(code)):
    if code[i]==1:
        ax.scatter(data[i].transpose()[0], data[i].transpose()[1], marker='v', s=30, c='y')
    elif code[i] == 2:
        ax.scatter(data[i].transpose()[0], data[i].transpose()[1], marker='o', s=30, c='r')
    elif code[i] == 3:
        ax.scatter(data[i].transpose()[0], data[i].transpose()[1], marker='s', s=30, c='g')
    else:
示例#49
0
 fp3 = open('proc_data/sift_data_categoryMeans.pkl', 'rb')
 
 SIFTData = pkl.load(fp1)
 exampleHash = pkl.load(fp2)
 categoryMeans = pkl.load(fp3)
 fp1.close()
 fp2.close()
 fp3.close()
 
 # Run K-means
     
 whiten(SIFTData)
 print "Running K-means..."
 codebook, _distortion = kmeans(SIFTData, 101, 100)
 pkl.dump(codebook, open('proc_data/codebook_k-means_SIFT.pkl', 'wb'))
 assignments, _distortion = vq(SIFTData, codebook)
 pkl.dump(assignments, open('proc_data/assignments_k-means_SIFT.pkl', 'wb'))
 print "Ran K-means"
 if(len(assignments) != SIFTData.shape[0]):
     raise LogicalError, "Method %s: K-means should have computed %d assignments; instead, it computed %d." %(stack()[0][3], SIFTData.shape[0], len(assignments))
 errorRate, goodClusters, avgEntropy = evaluateClustering(codebook, SIFTData, assignments, categoryMeans, exampleHash, 101)
 print "K-means produced an error rate of %.4f%%, computed %d \"good\" clusters and introduced an average entropy of %.4f." %(errorRate, goodClusters, avgEntropy)
 fp = open('output_data/errorRate_K-means_SIFTFeatures.txt', 'w')
 fp.write(str(errorRate))
 fp.close()
 fp = open('output_data/accurateClusters_K-means_SIFTFeatures.txt', 'w')
 fp.write(str(goodClusters))
 fp.close()
 fp = open('output_data/averageEntropy_K-means_SIFTFeatures.txt', 'w')
 fp.write(str(avgEntropy))
 fp.close()
示例#50
0
def colar_detection(img):
    # Get the training classes names and store them in a list
    cascadePath = "haarcascade_frontalface_default.xml"
    train_path = "D:\\t\\"
    training_names = os.listdir(train_path)

    image_paths = []  # Inilialising the list
    image_classes = []  # Inilialising the list
    class_id = 0
    for (i, training_name) in enumerate(training_names):
        label = training_name.split(os.path.sep)[-1].split("_")[0]
        temp = 'b'
        if label == 'haryana':
            temp = 'Collar'
        else:
            temp = 'Non Collar'
        image_paths.append(training_name)
        image_classes.append(temp)
        class_id += 1

    sift = cv2.xfeatures2d.SIFT_create()
    # List where all the descriptors are stored
    des_list = []
    # Reading the image and calculating the features and corresponding descriptors
    for image_pat in image_paths:
        image_path = train_path + image_pat
        im = cv2.imread(image_path)
        #print (im)
        #======================face detection and ROI==================#
        faceCascade = cv2.CascadeClassifier(cascadePath)
        gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
        faces = faceCascade.detectMultiScale(gray,
                                             scaleFactor=1.1,
                                             minNeighbors=5,
                                             minSize=(30, 30),
                                             flags=cv2.CASCADE_SCALE_IMAGE)
        # Draw a rectangle around the faces
        W = 0
        H = 0
        X = 0
        Y = 0
        for (x, y, w, h) in faces:
            W = w
            H = h
            X = x
            Y = y
        if (W == 0 & H == 0 & X == 0 & Y == 0):
            temp_train = im
        else:
            temp_train = im[Y + H:Y + H + 40, X - 20:X + W + 20]
        temp2_train = cv2.cvtColor(temp_train, cv2.COLOR_BGR2GRAY)
        ret_train, thresh_train = cv2.threshold(temp2_train, 225, 255, 0)
        #======================End face detection and ROI==================#

        kpts, des = sift.detectAndCompute(temp2_train, None)
        des_list.append(
            (image_path,
             des))  # Appending all the descriptors into the single list

    # Stack all the descriptors vertically in a numpy array
    descriptors = des_list[0][1]
    for image_path, descriptor in des_list[1:]:
        descriptors = np.vstack(
            (descriptors, descriptor))  # Stacking the descriptors

    # Perform k-means clustering
    k = 50  # Number of clusters
    voc, variance = kmeans(descriptors, k,
                           1)  # Perform Kmeans with default values

    # Calculate the histogram of features
    im_features = np.zeros((len(image_paths), k), "float32")
    for i in range(len(image_paths)):
        words, distance = vq(des_list[i][1], voc)
        for w in words:
            im_features[i][w] += 1

    # Perform Tf-Idf vectorization
    nbr_occurences = np.sum((im_features > 0) * 1, axis=0)
    # Calculating the number of occurrences
    idf = np.array(
        np.log((1.0 * len(image_paths) + 1) / (1.0 * nbr_occurences + 1)),
        'float32')
    # Giving weight to one that occurs more frequently

    # Scaling the words
    stdSlr = StandardScaler().fit(im_features)
    im_features = stdSlr.transform(
        im_features)  # Scaling the visual words for better Prediction

    # Load the classifier, class names, scaler, number of clusters and vocabulary
    samples = im_features
    responses = np.array(image_classes)
    classes_names = training_names
    voc = voc
    clf = KNeighborsClassifier()
    #Use - rawImages and labels for traing the model.
    clf.fit(samples, responses)
    #args = vars(parser.parse_args())
    image_paths1 = [img]
    #cascadePath = "haarcascade_frontalface_default.xml"
    faceCascade = cv2.CascadeClassifier(cascadePath)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = faceCascade.detectMultiScale(gray,
                                         scaleFactor=1.1,
                                         minNeighbors=5,
                                         minSize=(30, 30),
                                         flags=cv2.CASCADE_SCALE_IMAGE)
    # Draw a rectangle around the faces
    W = 0
    H = 0
    X = 0
    Y = 0
    for (x, y, w, h) in faces:
        W = w
        H = h
        X = x
        Y = y
        cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
    #cv2.imshow('faces',img)
    ##---------END FACE DETECTION----------##

    ##---------COLAR DETECTION----------##
    temp = img[Y + H:Y + H + 40, X - 20:X + W + 20]
    cv2.imshow('ROI', temp)
    temp2 = cv2.cvtColor(temp, cv2.COLOR_BGR2GRAY)
    ret, thresh = cv2.threshold(temp2, 225, 255, 0)

    # List where all the descriptors are stored
    des_list1 = []
    kpts1, des1 = sift.detectAndCompute(thresh, None)
    des_list1.append(
        (image_paths1, des1))  # Appending the descriptors to a single list

    # Stack all the descriptors vertically in a numpy array
    descriptors1 = des_list1[0][1]
    test_features1 = np.zeros((len(image_paths1), k), "float32")
    for i in range(len(image_paths1)):
        words1, distance1 = vq(des_list1[i][1], voc)
        for w in words1:
            test_features1[i][w] += 1  # Calculating the histogram of features

    # Perform Tf-Idf vectorization
    nbr_occurences1 = np.sum(
        (test_features1 > 0) * 1,
        axis=0)  # Getting the number of occurrences of each word
    idf1 = np.array(
        np.log((1.0 * len(image_paths1) + 1) / (1.0 * nbr_occurences1 + 1)),
        'float32')

    op = clf.predict(test_features1)
    return op
示例#51
0
    histograms = {}

    vqtime = 0
    histtime = 0
    t1 = 0
    vqhisttime = time()
    missedrange = 0

    # make histograms for each image
    for f in foldernames:
        count = len(orbFeatures[f])
        histograms[f] = []
        for i in range(count):

            t1 = time()
            codewords = vq(orbFeatures[f][i], centroids)[0]
            t1 = time() - t1
            vqtime += t1

            t1 = time()
            # get histograms (bag of words)
            hist, bins, patches = plt.hist(codewords, bins=200)
            t1 = time() - t1
            histtime += t1
            hlen = len(hist)

            if hlen < 200:
                missedrange += 1
                hist += [0 for i in range(200 - hlen)]

            # store histogram for this picture
def trainDataSet():
    try:
        train_path = SVM_TRAIN_PATH
        training_names = os.listdir(train_path)
        image_classes = []
        image_paths = []
        class_id = 0
        loopCounter = 0
        loopCounterForPrint = 0

        logData(" getting names of classes")
        for training_name in training_names:
            dir = os.path.join(train_path, training_name)
            class_path = [os.path.join(dir, f) for f in os.listdir(dir)]
            image_paths += class_path
            image_classes += [class_id] * len(class_path)
            class_id += 1
        logData("creating feature extractions with method " +
                SVM_FEATURE_DETECTOR_EXTRACTOR_TYPE)
        # Create feature extraction and keypoint detector objects
        fea_det = cv.FeatureDetector_create(
            SVM_FEATURE_DETECTOR_EXTRACTOR_TYPE)
        des_ext = cv.DescriptorExtractor_create(
            SVM_FEATURE_DETECTOR_EXTRACTOR_TYPE)
        logData("starting to extract features from each images")
        # List where all the descriptors are stored
        des_list = []
        for image_path in image_paths:
            im = cv.imread(image_path)
            kpts = fea_det.detect(im)
            kpts, des = des_ext.compute(im, kpts)
            des_list.append((image_path, des))
            loopCounter = loopCounter + 1
            if loopCounter % 100 == 0:
                loopCounter = 0
                loopCounterForPrint = loopCounterForPrint + 1
                logData("extracting  features continues with iteration " +
                        str(loopCounterForPrint))
        logData("setting descriptors values from extracted feautures")
        # Stack all the descriptors vertically in a numpy array
        loopCounter = 0
        loopCounterForPrint = 0
        descriptors = des_list[0][1]
        for image_path, descriptor in des_list[1:]:
            descriptors = np.vstack((descriptors, descriptor))
            loopCounter = loopCounter + 1
            if loopCounter % 100 == 0:
                loopCounter = 0
                loopCounterForPrint = loopCounterForPrint + 1
                logData("setting descriptors continues with iteration " +
                        str(loopCounterForPrint))

        # Perform k-means clustering
        logData("calculation kmeans clustring")
        k = 100
        voc, variance = kmeans(descriptors, k, 1)
        # Calculate the histogram of features
        logData("calculating the histogram of features")
        loopCounter = 0
        loopCounterForPrint = 0
        im_features = np.zeros((len(image_paths), k), "float32")
        for i in xrange(len(image_paths)):
            words, distance = vq(des_list[i][1], voc)
            for w in words:
                im_features[i][w] += 1
            loopCounter = loopCounter + 1
            if loopCounter % 100 == 0:
                loopCounter = 0
                loopCounterForPrint = loopCounterForPrint + 1
                logData(
                    "calculating histogram feautres continues with iteration "
                    + str(loopCounterForPrint))
        logData("performing TF-IDF  vectorization")
        # Perform Tf-Idf vectorization
        nbr_occurences = np.sum((im_features > 0) * 1, axis=0)
        idf = np.array(
            np.log((1.0 * len(image_paths) + 1) / (1.0 * nbr_occurences + 1)),
            'float32')
        # Scaling the words
        stdSlr = StandardScaler().fit(im_features)
        im_features = stdSlr.transform(im_features)
        # Train the Linear SVM
        logData("training Linear SVM")
        clf = LinearSVC()
        clf.fit(im_features, np.array(image_classes))
        # Save the SVM
        logData("saving values on with names " + SVM_TRAINED_FILE_LOCATION)
        joblib.dump((clf, training_names, stdSlr, k, voc),
                    SVM_TRAINED_FILE_LOCATION,
                    compress=3)
    except Exception as ex:
        logData("Exception on " + str(ex))
     fp1.close()
     
     ####### Part 2: Run K-means with K = 101 on data and store results on disk. #############
     
     
     # Normalize the features to have variance 1 (k-means requirement).
     
     whiten(imFeatures)
     
     # Run k-means 500 times (the default) on the data with aim to produce k = 101 clusters.
     # The stopping criterion of each iteration is a difference in the computed distortion
     # (mean squared error) less than e-05 (the default)
     
     print "Running K-means..."
     codebook, _distortion = kmeans(imFeatures, 101, 500)
     assignments, _distortion = vq(imFeatures, codebook)
     if(len(assignments) != imFeatures.shape[0]):
         raise LogicalError, "Method %s: K-means should have computed %d assignments; instead, it computed %d." %(CURR_FUNC_NAME, imFeatures.shape[0], len(assignments))
     print "Ran K-means"
     accurateClusters = evaluateClustering(codebook, imFeatures, assignments, categoryMeans, exampleHash, 101)
  
     print "We computed %d \"accurate\" clusters, which corresponds to %.3f%% of total true classes." %(accurateClusters, 100*(accurateClusters/101)) 
     
     fp = open('output_data/accurateClustersForGradients.pkl', 'wb')
     pkl.dump(accurateClusters, fp)
     fp.close()
     print "That would be all. Exiting..."
     quit()
     
 except DatasetError as d:
     print "A dataset-related error occurred: " + str(d)
def testDataSet():
    try:
        svmRateOfSuccessList = {}
        test_path = SVM_TEST_PATH
        # Load the classifier, class names, scaler, number of clusters and vocabulary
        clf, classes_names, stdSlr, k, voc = joblib.load(
            SVM_TRAINED_FILE_LOCATION)
        testing_names = os.listdir(test_path)
        image_paths = []
        numberOfLocalImages = 0
        counterOfLocalImages = 0
        logData("creating feature extractions with method " +
                SVM_FEATURE_DETECTOR_EXTRACTOR_TYPE)
        # Create feature extraction and keypoint detector objects
        fea_det = cv.FeatureDetector_create(
            SVM_FEATURE_DETECTOR_EXTRACTOR_TYPE)
        des_ext = cv.DescriptorExtractor_create(
            SVM_FEATURE_DETECTOR_EXTRACTOR_TYPE)

        logData(" getting names of classes")
        for i, testing_name in enumerate(testing_names):
            try:
                dir = os.path.join(test_path, testing_name)
                class_path = [os.path.join(dir, f) for f in os.listdir(dir)]
                image_paths = class_path  #image_paths+=class_path
                numberOfLocalImages = class_path.__len__()
                logData("the class name [" + str(testing_names[i]) +
                        "] has [" + str(numberOfLocalImages) + "] images ")
                logData("starting to extract features from each images")
                # List where all the descriptors are stored
                des_list = []
                for image_path in image_paths:
                    im = cv.imread(image_path)
                    kpts = fea_det.detect(im)
                    kpts, des = des_ext.compute(im, kpts)
                    des_list.append((image_path, des))

                # Stack all the descriptors vertically in a numpy array
                logData("setting descriptors values from extracted feautures")

                descriptors = des_list[0][1]
                for image_path, descriptor in des_list[0:]:
                    descriptors = np.vstack((descriptors, descriptor))
                #
                logData("calculating the histogram of features")

                test_features = np.zeros((len(image_paths), k), "float32")
                for i in xrange(len(image_paths)):
                    words, distance = vq(des_list[i][1], voc)
                    for w in words:
                        test_features[i][w] += 1
                logData("performing TF-IDF  vectorization")
                # Perform Tf-Idf vectorization
                nbr_occurences = np.sum((test_features > 0) * 1, axis=0)
                idf = np.array(
                    np.log((1.0 * len(image_paths) + 1) /
                           (1.0 * nbr_occurences + 1)), 'float32')
                # Scale the features
                logData("testing the predictions")
                test_features = stdSlr.transform(test_features)
                # Perform the predictions
                predictions = []
                predictions = [
                    classes_names[i] for i in clf.predict(test_features)
                ]
                nummberOfOccurrencesOfPrediction = predictions.count(
                    str(testing_names[counterOfLocalImages]))
                rateOfSuccess = (nummberOfOccurrencesOfPrediction *
                                 100) / predictions.__len__()
                prediction = clf.predict(test_features)
                logData("the succes rate of  ["+str(testing_names[counterOfLocalImages]) +"] is : [%"+ str(rateOfSuccess) +\
                    "] of total number of ["+str(predictions.__len__())+"] data with tp ["+ str(nummberOfOccurrencesOfPrediction)+"]")
                logData("")
                svmRateOfSuccessList[
                    testing_names[counterOfLocalImages]] = rateOfSuccess
                #logData("Prediction results "+ str(clf.predict(test_features)))
                #logData("the predictions :"+ str(predictions))
                counterOfLocalImages = counterOfLocalImages + 1
            except Exception as ex:
                logData("Exception on " + str(ex))
                counterOfLocalImages = counterOfLocalImages + 1
        plotResultOfSVM(svmRateOfSuccessList)
    except Exception as ex:
        logData("Exception on " + str(ex))
示例#55
0
 def project(self, descriptors):
     imhist = np.zeros((self.nbr_words))
     words, distance = vq(descriptors, self.voc)  #将各特征点向量归类到距离最近的聚类中心
     for w in words:
         imhist[w] += 1
     return imhist
示例#56
0
kpts, des = detector.detectAndCompute(im, None)
# kpts = fea_det.detect(im)
# kpts, des = des_ext.compute(im, kpts)

# rootsift
#rs = RootSIFT()
#des = rs.compute(kpts, des)

des_list.append((image_path, des))

# Stack all the descriptors vertically in a numpy array
descriptors = des_list[0][1]

#
test_features = np.zeros((1, numWords), "float32")
words, distance = vq(descriptors, voc)
for w in words:
    test_features[0][w] += 1

# Perform Tf-Idf vectorization and L2 normalization
test_features = test_features * idf
test_features = preprocessing.normalize(test_features, norm='l2')

score = np.dot(test_features, im_features.T)
rank_ID = np.argsort(-score)

# Visualize the results
figure()
gray()
subplot(6, 4, 1)
imshow(im[:, :, ::-1])
rowsum = sum(S, axis=0)
D = diag(1 / sqrt(rowsum))
I = identity(n)
L = I - dot(D, dot(S, D))

# compute eigenvectors of L
U, sigma, V = linalg.svd(L)
k = 2

# create feature vector from k first eigenvectors
# by stacking eigenvectors as columns
features = array(V[:k]).T

# k-means
features = whiten(features)
centroids, distortion = kmeans(features, k)
code, distance = vq(features, centroids)

# plot clusters
for c in range(k):
    ind = where(code == c)[0]
    figure()
    gray()
    for i in range(minimum(len(ind), 39)):
        im = Image.open(imlist[ind[i]])
        subplot(5, 4, i + 1)
        imshow(array(im))
        axis('equal')
        axis('off')
show()
print img_clusters

'''
for new image, predict!
'''

file_test = open('./testimg.pickle','rb')
test_imgs = pickle.load(file_test)
file_nms = open('./test/imagename.pickle','rb')
test_nms = pickle.load(file_nms)
#determine the sift descriptors as belonging to which visual vocabulary
file_no = 0
test = test_imgs
#for test in test_imgs:
try:
    words, distance = vq(np.array(test),vocab)
    X = np.zeros(k, dtype = np.int64)
    for w in words:
        X[w] += 1
    #bag of words model ready, now predict in lda of visual topics
    topic_est = model.transform(X)
    print topic_est
    
    tmin = np.min(topic_est)
    tmax = np.max(topic_est)
    thresh = (tmin+tmax)/2
    imp_t = list(map(lambda x:1 if x>thresh else 0, topic_est[0]))
    pred_lab = kmeans.predict(imp_t)    #warning
    annot_clust = [0]*17
    for img in img_clusters[pred_lab[0]]:
        wt = 1-hamming(imp_t, imp_top_doc[imagenms.index(img)])
示例#59
0
 def get_words(self, descriptors):
     """ Convert descriptors to words. """
     return vq(descriptors, self.voc)[0]
示例#60
0
for images in imgPath:
    des = f_utils.keypoints(images, True)
    list_des.append((images, des))

# Make the descriptor list vertical
descrptr = list_des[0][1]
for image, d in list_des[0:]:
    descrptr = np.vstack((descrptr, d))

# Initialize with 0.0
fet = np.zeros((len(imgPath), k_val), "float32")

# Iterate through the image path and
# compute the histogram
for i_iter in xrange(len(imgPath)):
    word, dist = vq(list_des[i_iter][1], vocblry)
    for j_iter in word:
        fet[i_iter][j_iter] += 1

# Perform "term frequence-inverse document frequency"
# to determine the importance of a feature
freq = np.sum((fet > 0) * 1, axis=0)
invFreq = np.array(np.log((1.0 * len(imgPath) + 1) / (1.0 * freq + 1)),
                   "float32")

# Perform Standarization by centering and scaling
fet = slr.transform(fet)

# Get the prediction data and store
pred = [names[i] for i in clfr.predict(fet)]