def eval_model(args): """Evaluate model on training data.""" cfg, lbl = util.get_label_cfg_by_args(args) uid = cfg['uniqueid'] print('We are playing with %s' % uid) outdir = 'models/%s/gate_expert' % uid outname = 'gate_expert_model.pt' if KLLOSS: outname = 'gate_expert_kldiv_model.pt' if args.warm: outname = outname.replace('.pt', '_warm.pt') mdl_path = os.path.join(outdir, outname) gate_expert = GateExpertNet(mdl_path, args.argmax) eval_fun = gate_expert.get_y data = npload(cfg['file_path'], uid) datax = data[cfg['x_name']] datay = data[cfg['y_name']] evaly = eval_fun(datax) print(np.histogram(evaly[:, 48])) fig, ax = pld.get3dAxis() ax.scatter(datax[:, 0], datax[:, 1], evaly[:, 48]) loss = l1loss(evaly, datay) err_norm = np.mean(loss, axis=1) fig, ax = plt.subplots() ax.hist(err_norm) plt.show()
def eval_final_label(args): """Evaluation of labels on the training set.""" cfg, lbl = util.get_label_cfg_by_args(args) uid = cfg['uniqueid'] print('We are playing with %s' % uid) outdir = 'models/%s/gate_expert' % uid outname = 'gate_expert_model.pt' if KLLOSS: outname = 'gate_expert_kldiv_model.pt' if args.warm: outname = outname.replace('.pt', '_warm.pt') mdl_path = os.path.join(outdir, outname) gate_expert = GateExpertNet(mdl_path, False) eval_fun = gate_expert.get_p_y data = npload(cfg['file_path'], uid) datax = data[cfg['x_name']] p, v = eval_fun(datax) label = np.argmax(p, axis=1) if args.draw: fig, ax = plt.subplots() n_expert = np.amax(label) + 1 for i in range(n_expert): mask = label == i ax.scatter(datax[mask, 0], datax[mask, 1]) plt.show() label_name = 'data/pen/gate_expert_label.npy' if KLLOSS: label_name = label_name.replace('_label', '_kldiv_label') if args.warm: label_name = label_name.replace('.npy', '_warm.npy') np.save(label_name, label)
def show_labels(args): """Show the labels on a graph""" pld.setGlobalFontSize(16) fig, ax = plt.subplots(2, 2, figsize=(8, 6)) keys = ['MoE Cost', 'MoE Gate', 'k-Means-3', 'k-Means-5'] titles = ['MoE I', 'MoE II', '$k$-Means-3', '$k$-Means-5'] moelbl = np.load('data/pen/moe_label.npy') momlbl = np.load('data/pen/mom_label.npy') gatelbl = np.load('data/pen/gate_expert_label.npy') kmeanlbl = np.load('data/pen/pca_kmean_label.npz') k3lbl = kmeanlbl['3'] k5lbl = kmeanlbl['5'] data = npload(cfg['file_path'], cfg['uniqueid']) x0 = data[cfg['x_name']] markers = ['s', 'o', 'x', 'd', '*'] colors = ['b', 'g', 'r', 'c', 'k'] cm = plt.get_cmap('jet') norm = mpl.colors.Normalize(0, 5) def show_label_on_axis(ax, x, lbl): nlbl = np.amax(lbl) + 1 ax.imshow(np.reshape(lbl, (61, 21)).T, cmap=cm, origin='lower', norm=norm, extent=[0, 2 * np.pi, -2.0, 2.0]) # for i in range(nlbl): # mask = lbl == i # ax.scatter(x[mask, 0], x[mask, 1], s=3, marker=markers[i], color=colors[i]) show_label_on_axis(ax[0][0], x0, moelbl) ax[0][0].set_title(titles[0]) show_label_on_axis(ax[0][1], x0, gatelbl) ax[0][1].set_title(titles[1]) show_label_on_axis(ax[1][0], x0, k3lbl) ax[1][0].set_title(titles[2]) show_label_on_axis(ax[1][1], x0, k5lbl) ax[1][1].set_title(titles[3]) ax[1][0].set_xlabel(r'$\theta$') ax[1][1].set_xlabel(r'$\theta$') ax[0][0].set_xticklabels([]) ax[0][1].set_xticklabels([]) ax[0][0].set_ylabel(r'$\omega$') ax[1][0].set_ylabel(r'$\omega$') ax[0][1].set_yticklabels([]) ax[1][1].set_yticklabels([]) fig.tight_layout() fig.savefig('gallery/pen/pen_label_assign.pdf') plt.show()
def eval_final_label(args): """Evaluation of labels on the training set.""" cfg, lbl = util.get_label_cfg_by_args(args) uid = cfg['uniqueid'] print('We are playing with %s' % uid) outdir = 'models/%s/moe' % uid outname = 'moe_model.pt' mdl_path = os.path.join(outdir, outname) eval_fun = get_moe_loader(mdl_path, True) data = npload(cfg['file_path'], uid) datax = data[cfg['x_name']] p, v = eval_fun(datax) label = np.argmax(p, axis=1) np.save('data/pen/moe_label.npy', label)
def run_the_training(args): """Run the MoE training without using any clustering information but let it find it on its own.""" cfg, lbl = util.get_label_cfg_by_args(args) uid = cfg['uniqueid'] print('We are playing with %s' % uid) data = npload(cfg['file_path'], uid) data_feed = {'x': data[cfg['x_name']], 'y': data[cfg['y_name']]} dimx = data_feed['x'].shape[1] dimy = data_feed['y'].shape[1] n_model = args.k # create the network net = MoMNet([dimx, 100, n_model], [[dimx, int(np.ceil(300.0 / n_model)), dimy] for _ in range(n_model)]) net.argmax = False config = genTrainConfig(outdir='models/%s/mom' % uid, outname='mom_model.pt', overwrite=False) if args.eval: mdl_path = os.path.join(config['outdir'], config['outname']) eval_fun = momLoader(mdl_path, withclus=args.prob, argmax=False) predy = eval_fun(data_feed['x']) return {'x': data_feed['x'], 'y': data_feed['y'], 'predy': predy} trainOne(config, data_feed, net=net)
def run_the_training(args, clus=None, expert=None): """Run the MoE training without using any clustering information but let it find it on its own.""" # load data cfg, lbl = util.get_label_cfg_by_args(args) uid = cfg['uniqueid'] print('We are playing with %s' % uid) data = npload(cfg['file_path'], uid) data_feed = {'x': data[cfg['x_name']], 'y': data[cfg['y_name']]} dimx = data_feed['x'].shape[1] dimy = data_feed['y'].shape[1] # create gate and expert if clus is None: n_model = 5 clus = GaoNet([dimx, 100, n_model]) expert = Experts([[dimx, 60, dimy]] * n_model) # cuda it clus.cuda() expert.cuda() # set data loader xname, yname = 'x', 'y' factory = KeyFactory(data_feed, xname, yname, scalex=True, scaley=True) factory.shuffle(None) draw_clus_region(clus, data_feed['x'], factory) # create two sets trainsize = 0.8 trainSet = SubFactory(factory, 0.0, trainsize) testSet = SubFactory(factory, trainsize, 1.0) batch_size = 32 test_batch_size = -1 trainLder = DataLoader(trainSet, batch_size=batch_size, shuffle=False) testLder = DataLoader(testSet, batch_size=test_batch_size, shuffle=False) # set up file output outname = 'gate_expert_model.pt' outdir = 'models/pen/gate_expert' if KLLOSS: outname = 'gate_expert_kldiv_model.pt' if args.warm: outname = outname.replace('.pt', '_warm.pt') # set optimizer lr = 1e-3 opt_G = torch.optim.Adam(clus.parameters(), lr=lr) opt_E = torch.optim.Adam(expert.parameters(), lr=lr) # set other training stuff n_epoch = 500 back_check_epoch = 8 best_test_loss = np.inf best_test_loss_expert = np.inf best_test_epoch = 0 def get_mean_error(g_y, exp_y, feedy): """Calculate two loss""" error_traj = torch.mean((exp_y - feedy.expand_as(exp_y))**2, dim=2).t() g = f.softmax(g_y) log_g = f.log_softmax(g_y) posterior = g * torch.exp( -0.5 * error_traj) # b by r probability, not scaled to 1 traj_prob = torch.mean(-torch.log(torch.sum(posterior, dim=1))) if KLLOSS: posterior_scale = Variable( (posterior / torch.sum(posterior, dim=1, keepdim=True) ).data) # do not use gradient of it div_error = f.kl_div(log_g, posterior_scale) return traj_prob, div_error else: Og = torch.sum(exp_y * g.t().unsqueeze(2), dim=0) traj_error = f.smooth_l1_loss(Og, feedy) return traj_prob, traj_error # start training for epoch in range(n_epoch): sum_train_loss = 0 sum_train_loss_prob = 0 for idx, batch_data in enumerate(trainLder): feedy = Variable(batch_data[yname], requires_grad=False).cuda() feedx = Variable(batch_data[xname], requires_grad=False).cuda() # train experts opt_E.zero_grad() opt_G.zero_grad() exp_y = expert(feedx) g_y = clus(feedx) g = f.softmax(g_y) # this is prior log_g = f.log_softmax(g_y) error_traj = torch.mean((exp_y - feedy.expand_as(exp_y))**2, dim=2).t() posterior = g * torch.exp( -0.5 * error_traj) # b by r probability, not scaled to 1 posterior_scale = Variable( (posterior / torch.sum(posterior, dim=1, keepdim=True) ).data) # do not use gradient of it lossi = torch.mean(-torch.log(torch.sum(posterior, dim=1))) lossi.backward(retain_graph=True) sum_train_loss_prob += lossi.cpu().data.numpy() * feedx.size()[0] opt_E.step() # update h by regression error all_pred = exp_y if KLLOSS: error = f.kl_div(log_g, posterior_scale) else: Og_before = all_pred * g.t().unsqueeze(2) Og = torch.sum(Og_before, dim=0) error = f.smooth_l1_loss(Og, feedy) sum_train_loss += error.cpu().data.numpy() * feedx.size()[0] error.backward() opt_G.step() # val = clus.printWeights(3) mean_train_loss = sum_train_loss / trainLder.getNumData() mean_train_loss_prob = sum_train_loss_prob / trainLder.getNumData() # evaluate on test data sum_test_loss_gate = 0 sum_test_loss_expert = 0 n_test_data = testLder.getNumData() for idx, batch_data in enumerate(testLder): feedy = Variable(batch_data[yname], volatile=True).cuda() feedx = Variable(batch_data[xname], volatile=True).cuda() exp_y = expert(feedx) g_y = clus(feedx) traj_prob, div_error = get_mean_error(g_y, exp_y, feedy) sum_test_loss_gate += div_error.cpu().data.numpy() * feedx.size( )[0] sum_test_loss_expert += traj_prob.cpu().data.numpy() * feedx.size( )[0] mean_test_loss_gate = sum_test_loss_gate / n_test_data mean_test_loss_expert = sum_test_loss_expert / n_test_data print('epoch %d gate loss %f expert loss %f test gate loss %f expert loss %f' \ % (epoch, mean_train_loss, mean_train_loss_prob, mean_test_loss_gate, mean_test_loss_expert)) if mean_test_loss_gate < best_test_loss: best_test_loss = mean_test_loss_gate best_test_epoch = epoch if mean_test_loss_expert < best_test_loss_expert: best_test_loss_expert = mean_test_loss_expert best_test_epoch = epoch if epoch > best_test_epoch + back_check_epoch: break print('Save model now') # draw region for classifier draw_clus_region(clus, data_feed['x'], factory) clus.cpu() expert.cpu() model = { 'gate': clus, 'expert': expert, 'xScale': [trainLder.xmean, trainLder.xstd], 'yScale': [trainLder.ymean, trainLder.ystd] } if not os.path.exists(outdir): os.mkdir(outdir) torch.save(model, os.path.join(outdir, outname))
def show_picky_states(cfg, lbl_name, args): """Select a few states and draw predictions.""" uid = cfg['uniqueid'] lbl_name = 'pca_kmean_label' # load all training data and validation data data = npload(cfg['file_path'], uid) xname, yname = cfg['x_name'], cfg['y_name'] datax, datay = data[xname], data[yname] # create a query instance query = Query(datax, scale=True) vdata = np.load(cfg['valid_path']) vx, vy = vdata[xname], vdata[yname] # snn model snn_fun = modelLoader(cfg['snn_path']) # moe model result = util.get_clus_reg_by_dir('models/%s/%s' % (uid, lbl_name)) cls, regs = result[10] # let me try this one net = MoMNet(cls, regs) # load cluster labels lbl_data_dct = np.load('data/%s/%s.npz' % (uid, lbl_name)) label = lbl_data_dct['10'] # eval snn on validation set and extract the one with largest prediction error pred_vy = snn_fun(vx) diff_vy = pred_vy - vy error_y = np.linalg.norm(diff_vy, axis=1) error_order = np.argsort(error_y) for i in range(7, 20): vx_idx = error_order[-1 - i] bad_x0 = vx[vx_idx] bad_sol = vy[vx_idx] snn_pred = pred_vy[vx_idx] moe_pred = net.getPredY(bad_x0) predX, _, _ = parseX(snn_pred) realX, _, _ = parseX(bad_sol) predXMoE, _, _ = parseX(moe_pred) # get neighbors index = query.getIndex(bad_x0) print('index ', index, 'label ', label[index]) # draw them fig, axes = plt.subplots(1, 2) shown_cluster = [] for ind in index: nnX, _, _ = parseX(datay[ind]) if label[ind] not in shown_cluster: axes[1].plot(nnX[:, 0], nnX[:, 1], color='C%d' % label[ind], label='Cluster %d' % label[ind]) shown_cluster.append(label[ind]) else: axes[1].plot(nnX[:, 0], nnX[:, 1], color='C%d' % label[ind]) axes[0].plot(predX[:, 0], predX[:, 1], color='#ff7f0e', linewidth=2, ls='--', label='SNN') axes[0].plot(predXMoE[:, 0], predXMoE[:, 1], color='g', linewidth=2, ls='--', label='MoE') axes[0].plot(realX[:, 0], realX[:, 1], color='k', linewidth=2, label='Opt.') finalAgl = predX[-1, 2] direc = [1*np.sin(finalAgl), 1*np.cos(finalAgl)] xf = predX[-1] for i in range(2): ax = axes[i] if i == 0: ax.arrow(xf[0], xf[1], direc[0], direc[1], color='#ff7f0e', linewidth=2, width=0.1) finalAgl = predXMoE[-1, 2] direc = [1*np.sin(finalAgl), 1*np.cos(finalAgl)] xf = predXMoE[-1] ax.arrow(xf[0], xf[1], direc[0], direc[1], color='g', linewidth=2, width=0.1) ax.scatter(0, 0, s=50, color='r') ax.annotate('Goal', (0, 0), xytext=(0.2, 0.2), textcoords='data') ax.scatter(bad_x0[0], bad_x0[1], s=50, color='k', marker='*') if i == 0: ax.annotate('Start', (bad_x0[0], bad_x0[1]), xytext=(-1 + bad_x0[0], -0.8 + bad_x0[1]), textcoords='data') else: ax.annotate('Start', (bad_x0[0], bad_x0[1]), xytext=(bad_x0[0], 0.3 + bad_x0[1]), textcoords='data') ax.set_xlabel(r'$x$') ax.axis('equal') if i == 0: xlim = ax.get_xlim() ax.set_ylabel(r'$y$') if i == 0: ax.legend() else: ax.legend(loc=4) if i == 0: ax.set_xlim(-2.5, xlim[1] + 1) else: xlim = ax.get_xlim() ax.set_xlim(xlim[0] - 1, xlim[1] + 1.5) fig.tight_layout() fig.savefig('gallery/car/car_snn_vs_moe_traj.pdf') plt.show()