def test1(): datMat = mat(kMeans.loadDataSet('testSet.txt')) print(min(datMat[:, 0])) print(kMeans.randCent(datMat, 2)) print(kMeans.distEclud(datMat[0], datMat[1])) #myCentroids, clustAssing = kMeans.kMeans(datMat,3) myCentroids, clustAssing = kMeans.biKmeans(datMat, 4) print(myCentroids) kMeans.plot1(datMat, myCentroids)
''' Created on 2016. 2. 9. @author: TaijinKim ''' import kMeans from numpy import * dataMat = mat(kMeans.loadDataSet('../data/testSet.txt')) # print(min(dataMat[:, 0])) # print(min(dataMat[:, 1])) # print(max(dataMat[:, 1])) # print(max(dataMat[:, 0])) # print(kMeans.randCent(dataMat, 2)) # # print(kMeans.distEclud(dataMat[0], dataMat[1])) # myCentroids, clustAssing = kMeans.kMeans(dataMat, 4)
#!/usr/bin/env python __coding__ = "utf-8" __author__ = "Ng WaiMing" from kMeans import kMeans from kMeans import loadDataSet from kMeans import randCent from kMeans import distEclud from kMeans import biKmeans from numpy import * if __name__ == '__main__': dataMat = mat(loadDataSet('testSet.txt')) print('min(dataMat[:, 0])', min(dataMat[:, 0]), '\n') print('min(dataMat[:, 1])', min(dataMat[:, 1]), '\n') print('max(dataMat[:, 0])', max(dataMat[:, 0]), '\n') print('max(dataMat[:, 1])', max(dataMat[:, 1]), '\n') print(randCent(dataMat, 2), '\n') print(distEclud(dataMat[0], dataMat[1])) centroids, clusterAssment = kMeans(dataMat, 4) print('centroids:\n', centroids, '\n') print('clusterAssment:\n', clusterAssment, '\n') dataMat3 = mat(loadDataSet('testSet2.txt')) centList, myNewAssments = biKmeans(dataMat3, 3) print('centList: \n', centList, '\n') # fileName = '../../../../data/k-means/places.txt' # imgName = '../../../../data/k-means/Portland.png' # kMeans.clusterClubs(fileName=fileName, imgName=imgName, numClust=5)
#!/usr/bin/env python #-*- coding: UTF-8 -*- import kMeans from numpy import * dataMat=mat(kMeans.loadDataSet('testSet.txt')) kMeansRandCenter=kMeans.randCent(dataMat,2) # 两个中心 print(kMeansRandCenter) centroids,clusterAssment=kMeans.kMeans(dataMat,5) import matplotlib.pyplot as plt fig=plt.figure(1) plt.plot(centroids[:,0],centroids[:,1],'ro') plt.plot(dataMat[:,0],dataMat[:,1],'bo') plt.axis([-8,8,-8,8]) # plt.show() kMeans.binaryKeans(dataMat,3) dataMat3=mat(kMeans.loadDataSet('testSet2.txt')) centList,Assments=kMeans.binaryKeans(dataMat3,3) print("centList:",centList) print("Assments:",Assments) fig=plt.figure(2) plt.plot(dataMat3[:,0],dataMat3[:,1],'bo') plt.plot(centList[:,0],centList[:,1],'ro') plt.axis([-10,10,-10,10]) # plt.show()
# 对每个质心 # 计算质心与数据点之间的距离 # 将数据点分配到距其最近的簇 # 对每一个簇,计算簇中所有点的均值并将均值作为质心 import kMeans from numpy import * import matplotlib import matplotlib.pyplot as plt import time st = time.time() k = 3 dataMat = mat(kMeans.loadDataSet('testSet2.txt')) oldClassLabel = zeros(len(dataMat), int) newClassLabel = ones(len(dataMat), int) center = kMeans.randCent(dataMat, k) dist = [] m = 0 while newClassLabel.tolist().__eq__( oldClassLabel.tolist()) != True: # 所有的点的新分类的标签和旧分类的标签不一致时就继续进行划分 m += 1 # 迭代次数 for di in range(len(dataMat)): # 对数据集中的每个数据点 dist = [] for ci in range(len(center)): # 对每个质心,计算某个点到质心的距离 dist.append(kMeans.distEclud(dataMat[di], center[ci])) # dist 用于记录一个点到所有簇点的距离 distsort = array(dist).argsort() # 对距离排序,返回从小到大的索引 oldClassLabel = newClassLabel.copy() # !!!!注意这里是引用不能直接用等号,否则将会使得两个值一起变 newClassLabel[di] = distsort[0] # 取出索引的最小值,就是距离最近的点 for j in range(k): x = mean(array(dataMat)[kMeans.find_all_index(newClassLabel, j),
import kMeans from numpy import * datMat = mat(kMeans.loadDataSet('testSet.txt')) print min(datMat[:, 0]) print min(datMat[:, 1]) print max(datMat[:, 0]) print max(datMat[:, 1]) print kMeans.randCent(datMat, 2) print kMeans.distEclud(datMat[0], datMat[1]) myCentroids, clustAssing = kMeans.kMeans(datMat, 4) #print myCentroids, clustAssing datMat3 = mat(kMeans.loadDataSet('testSet2.txt')) centList, myNewAssments = kMeans.biKmeans(datMat3, 3) print centList
import kMeans from numpy import * dataMat = mat(kMeans.loadDataSet('testSet.txt')) # print dataMat randMat = kMeans.randCent(dataMat, 2) # print dataMat[:, 0] # print randMat res = kMeans.kMeans(dataMat, 4) # print res dataMat3 = mat(kMeans.loadDataSet('testSet2.txt')) kMeans.biKmeans(dataMat3, 3) # centList, myNewAssments =
import kMeans from numpy import * # 导入txt数据 datMat = mat(kMeans.loadDataSet('data2.txt')) # datMat矩阵的第2-4列分别对应半长轴、偏心率和轨道倾角 datMat[0, 2:5] # 计算距离 delta_v = kMeans.distdeltaV(datMat[0, 2:5], datMat[1, 2:5]) # 随机生成k个质心 centroids = kMeans.randCent(datMat[:, 2:5], 4) # k-均值聚类 myCentroids, clustAssing = kMeans.kMeans(datMat[:, 2:5], 5, kMeans.distdeltaV) # 二分 k-均值聚类 centList, myNewAssments = kMeans.biKmeans(datMat[:, 2:5], 5, kMeans.distdeltaV) # 画图 kMeans.showCluster_SRQ(datMat[:, 2:5], myNewAssments)
Created on Sun May 14 11:57:07 2017 @author: 凯风 """ import kMeans import numpy as np from imp import reload reload(kMeans) datMat = np.mat(kMeans.loadDataSet('testSet.txt')) min(datMat[:,0]) max(datMat[:,0]) min(datMat[:,1]) max(datMat[:,1]) kMeans.randCent(datMat,2) # 看一下初始化的质心是否在取值范围内 kMeans.distEclud(datMat[0],datMat[1]) # 在实际数据上看下K-means reload(kMeans) datMat = np.mat(kMeans.loadDataSet('testSet.txt')) myCentroids,clustAssing = kMeans.kMeans(datMat,4) # 不一定是全局最优解 # 二分k-means reload(kMeans) datMat3 = np.mat(kMeans.loadDataSet('testSet2.txt')) centList,myNewAssments = kMeans.biKmeans(datMat3,3) # 其实依然无法保证全局最优解,只能是局部最优解 centList myNewAssments
import kMeans from numpy import * datMat = mat(kMeans.loadDataSet('testSet.txt')) print min(datMat[:,0]) print min(datMat[:,1]) print max(datMat[:,0]) print max(datMat[:,1]) print kMeans.randCent(datMat, 2) print kMeans.distEclud(datMat[0], datMat[1]) myCentroids, clustAssing = kMeans.kMeans(datMat, 4) #print myCentroids, clustAssing datMat3 = mat(kMeans.loadDataSet('testSet2.txt')) centList, myNewAssments = kMeans.biKmeans(datMat3, 3) print centList
import kMeans import os import sys from numpy import * project_path = os.path.abspath(os.path.dirname(__file__)) text_path = os.path.join(project_path, "../chapter10/testSet.txt") datMat = mat(kMeans.loadDataSet(text_path)) print(min(datMat[:, 0])) print(min(datMat[:, 1])) print(max(datMat[:, 1])) print(max(datMat[:, 0])) print(kMeans.randCent(datMat, 2)) print(kMeans.distEclud(datMat[0], datMat[1]))
from time import sleep import urllib import json # homedir= os.getcwd()+'/machinelearninginaction/ch10/' #绝对路径 homedir = '' #相对路径 #10.1 k均值聚类算法 datMat = mat(kMeans.loadDataSet(homedir + 'testSet.txt')) myCentroids, clustAssing = kMeans.kMeans(datMat, 4) print "datMat:", datMat print "min(datMat[:,0]):", min(datMat[:, 0]) print "min(datMat[:,1]):", min(datMat[:, 1]) print "max(datMat[:,0]):", max(datMat[:, 0]) print "max(datMat[:,1]):", max(datMat[:, 1]) print "randCent(datMat,2):", kMeans.randCent(datMat, 2) print "distEclud( datMat[ 0], datMat[ 1]):", kMeans.distEclud( datMat[0], datMat[1]) print "myCentroids:", myCentroids print "clustAssing:", clustAssing print ":", print ":", #10.3 二分k均值算法 datMat3 = mat(kMeans.loadDataSet(homedir + 'testSet2.txt')) centList, myNewAssments = kMeans.biKmeans(datMat3, 3) print "datMat3:", datMat3 print "centList:", centList print "myNewAssments:", myNewAssments #10.4.1 Yahoo!PlaceFinder API
import kMeans from numpy import * # 构建矩阵 datMat = mat(kMeans.loadDataSet('testSet.txt')) min(datMat[:, 0]) min(datMat[:, 1]) max(datMat[:, 1]) max(datMat[:, 0]) # 支持函数 kMeans.randCent(datMat, 2) kMeans.distEclud(datMat[0], datMat[1]) # 聚类 from importlib import reload reload(kMeans) datMat = mat(kMeans.loadDataSet('testSet.txt')) myCentroids, clustAssing = kMeans.kMeans(datMat, 4) # 二分 from importlib import reload reload(kMeans) datMat3 = mat(kMeans.loadDataSet('testSet2.txt')) centList, myNewAssments = kMeans.biKmeans(datMat3, 3) centList # 示例 from importlib import reload reload(kMeans) geoResults = kMeans.geoGrab('1 VA Center', 'Augusta, ME') geoResults['ResultSet']['Error']
def test1(): dataMat = np.mat(kMeans.loadDataSet('testSet.txt')) print kMeans.randCent(dataMat, 2) print kMeans.distEclud(dataMat[0], dataMat[1])