args = parse_command_line_arguments() datadir = args.datadir k = args.k idx = args.index if idx == None: train = MNISTReader("%s/train-images-idx3-ubyte" % datadir, "%s/train-labels-idx1-ubyte" % datadir, preload=True) test = MNISTReader("%s/t10k-images-idx3-ubyte" % datadir, "%s/t10k-labels-idx1-ubyte" % datadir, preload=True) else: train = MNISTReader("%s/train-images-idx3-ubyte" % datadir, "%s/train-labels-idx1-ubyte" % datadir) test = MNISTReader("%s/t10k-images-idx3-ubyte" % datadir, "%s/t10k-labels-idx1-ubyte" % datadir) if idx == None: i = 0 for p, known_label in test.images(as_array=True): label, nearest = knn(p, train.images(as_array=True), k) if label == known_label: x="" else: x="XXX" print "%i %i %i %s" % (i, known_label, label, x) sys.stdout.flush() i+=1 else: img = test.get_image(idx) label, nearest = knn(img.as_array(), train.images(as_array=True), k)
if idx == None: train = MNISTReader("%s/train-images-idx3-ubyte" % datadir, "%s/train-labels-idx1-ubyte" % datadir, preload=True) test = MNISTReader("%s/t10k-images-idx3-ubyte" % datadir, "%s/t10k-labels-idx1-ubyte" % datadir, preload=True) else: train = MNISTReader("%s/train-images-idx3-ubyte" % datadir, "%s/train-labels-idx1-ubyte" % datadir) test = MNISTReader("%s/t10k-images-idx3-ubyte" % datadir, "%s/t10k-labels-idx1-ubyte" % datadir) if idx == None: i = 0 for p, known_label in test.images(as_array=True): label, nearest = knn(p, train.images(as_array=True), k) if label == known_label: x = "" else: x = "XXX" print "%i %i %i %s" % (i, known_label, label, x) sys.stdout.flush() i += 1 else: img = test.get_image(idx) label, nearest = knn(img.as_array(), train.images(as_array=True), k)
help="Converge once the centroids move less than this threshold.") args = parser.parse_args() return args args = parse_command_line_arguments() k = args.k cutoff = args.cutoff print "Loading data..." test = MNISTReader("%s/t10k-images-idx3-ubyte" % args.datadir, "%s/t10k-labels-idx1-ubyte" % args.datadir) points = None for img in test.images(): if points is None: points = numpy.array([img.imgdata]) else: points = numpy.append(points, [img.imgdata], axis=0) km = KMeans(points, k) centroids = km.select_random_centroids() iteration=1 while True: print "Iteration #%i" % iteration new_centroids, point_assignment = km.get_k_means(centroids)