def main(): #first run the initializer to get starting centroids filePath = os.path.join(PROJECT_ROOT, 'input.txt') mrJob = MRkMeansInit(args=[filePath]) with mrJob.make_runner() as runner: runner.run() #pull out the centroid values to compare with values after one iteration centPath = os.path.join(PROJECT_ROOT, 'intermediateResults.txt') fileIn = open(centPath) centroidsJson = fileIn.read() fileIn.close() delta = 10 #Begin iteration on change in centroids while delta > 0.001: #parse old centroid values oldCentroids = json.loads(centroidsJson) #run one iteration mrJob2 = MRkMeansIter(args=[filePath]) with mrJob2.make_runner() as runner: runner.run() #compare new centroids to old ones fileIn = open(centPath) centroidsJson = fileIn.read() fileIn.close() newCentroids = json.loads(centroidsJson) kMeans = len(newCentroids) delta = 0.0 for i in range(kMeans): delta += dist(newCentroids[i], oldCentroids[i]) print "delta={0}, centers={1}".format(delta, str(newCentroids))