def main(): parser = argparse.ArgumentParser(description="层次聚类算法Scratch代码命令行参数") parser.add_argument("--k", type=int, default=3, help="聚类中心") args = parser.parse_args() X, y = load_iris(return_X_y=True) xtrain, _, ytrain, _ = train_test_split(X, y, train_size=0.8, shuffle=True) model = HierarchicalClusterScratch(args.k) model.fit(xtrain) y_pred = model.predict(xtrain) # 用真实标签绘制散点图 plot_scatter(xtrain, ytrain) # 用聚类标签绘制散点图 plot_scatter(xtrain, y_pred)
def main(): parser = argparse.ArgumentParser(description="kmeans算法Scratch代码命令行参数") parser.add_argument("--k", type=int, default=3, help="聚类中心") parser.add_argument("--max_iter", type=int, default=1000, help="最大迭代次数") args = parser.parse_args() X, y = load_iris(return_X_y=True) xtrain, _, ytrain, _ = train_test_split(X, y, train_size=0.8, shuffle=True) model = KMeansScratch(args.k, args.max_iter) model.fit(xtrain) y_pred = model.predict(xtrain) # 用真实标签绘制散点图 plot_scatter(xtrain, ytrain) # 用聚类标签绘制散点图 plot_scatter(xtrain, y_pred)
def main(): parser = argparse.ArgumentParser(description="高斯混合模型算法Scratch代码命令行参数") parser.add_argument("--k", type=int, default=3, help="聚类中心或高斯模型个数") parser.add_argument("--max_iter", type=int, default=1000, help="最大迭代次数") parser.add_argument("--tolerance", type=float, default=1e-6, help="模型收敛阈值") args = parser.parse_args() X, y = load_iris(return_X_y=True) xtrain, _, ytrain, _ = train_test_split(X, y, train_size=0.8, shuffle=True) model = GMMScratch(args.k, args.max_iter, args.tolerance) model.fit(xtrain) y_pred = model.predict(xtrain) # 用真实标签绘制散点图 plot_scatter(xtrain, ytrain) # 用聚类标签绘制散点图 plot_scatter(xtrain, y_pred)
import sys import numpy as np import math from util import plot_scatter data_points_B = np.asarray([[-0.5, 1], [0, 1], [0.25, 0.75], [-0.5, 0.5], [0.5, 0.5], [-0.5, 0], [0, 0], [0.25, -0.25], [-0.5, -0.5], [0.5, -0.5], [0.25, -0.75], [-0.5, -1], [0, -1], [0.25, 0.25], [-0.25, 0]]) * 2 def gaussian_mixture_B(batchsize, std=0.1): mean = data_points_B[np.random.choice(range(len(data_points_B)), batchsize)] return np.random.normal(mean, std**2, (batchsize, 2)).astype(np.float32) def gaussian_mixture(batchsize, num_cluster=8, scale=2, std=0.2): rand_indices = np.random.randint(0, num_cluster, size=batchsize) base_angle = math.pi * 2 / num_cluster angle = rand_indices * base_angle - math.pi / 2 mean = np.zeros((batchsize, 2), dtype=np.float32) mean[:, 0] = np.cos(angle) * scale mean[:, 1] = np.sin(angle) * scale return np.random.normal(mean, std**2, (batchsize, 2)).astype(np.float32) if __name__ == '__main__': plot_scatter(gaussian_mixture(1000), dir='.', color='red')
print('-- begin training --') with tf.Session() as sess: saver = tf.train.Saver() init = tf.global_variables_initializer() sess.run(init) for epoch in range(epoch_num): print('** epoch {} begin **'.format(epoch)) g_obj = 0.0 d_obj = 0.0 # plot p_g batch_z = np.random.normal(0, 1, [10000, z_dim]).astype(np.float32) tmp = model.generate(sess, batch_z) plot_scatter(tmp, 'result', epoch, None) for step in range(num_one_epoch): # draw from p_z batch_z = np.random.normal(0, 1, [batch_size, z_dim]).astype( np.float32) # draw from p_data #batch_inputs = gaussian_mixture(batch_size) batch_inputs = gaussian_mixture_B(batch_size) # train discriminator d_obj += model.training_disc(sess, batch_z, batch_inputs) # train generator
init = tf.global_variables_initializer() sess.run(init) for epoch in range(epoch_num): print('** epoch {} begin **'.format(epoch)) g_obj = 0.0 d_obj = 0.0 for step in range(num_one_epoch): # draw from p_z batch_z = np.random.normal(0, 1, [batch_size, z_dim]).astype(np.float32) # draw from p_data batch_inputs = gaussian_mixture(batch_size) # train discriminator d_obj += model.training_disc(sess, batch_z, batch_inputs) # train generator g_obj += model.training_gen(sess, batch_z) print('epoch:{}, d_obj = {}, g_obj = {}'.format(epoch, d_obj/num_one_epoch, g_obj/num_one_epoch)) # plot p_g batch_z = np.random.normal(0, 1, [10000, z_dim]).astype(np.float32) tmp = model.generate(sess, batch_z) plot_scatter(tmp, 'result', epoch) saver.save(sess, './model.dump')
print('-- begin training --') num_one_epoch = 50 with tf.Session() as sess: saver = tf.train.Saver() init = tf.global_variables_initializer() sess.run(init) for epoch in range(epoch_num): plotting = [] for i in range(50): batch_z = np.random.normal(0, 1, [batch_size, z_dim]).astype( np.float32) tmp = model.generate(sess, batch_z) plotting.extend([list(_) for _ in tmp]) plot_scatter(np.asarray(plotting), 'result', epoch) print('** epoch {} begin **'.format(epoch)) g_obj = 0.0 d_obj = 0.0 for step in range(num_one_epoch): for i in range(2): batch_z = np.random.normal( 0, 1, [batch_size, z_dim]).astype(np.float32) #batch_inputs = gaussian_mixture(batch_size) batch_inputs = gaussian_mixture_B(batch_size) model.training_disc(sess, batch_z, batch_inputs) batch_z = np.random.normal(0, 1, [batch_size, z_dim]).astype( np.float32) #batch_inputs = gaussian_mixture(batch_size)