def bundle_entropy(self, func, obs): act = np.ones((obs.shape[0], self.dimA)) * 0.5 def fg(x): value, grad = func(obs, 2 * x - 1) grad *= 2 return value, grad act = bundle_entropy.solveBatch(fg, act)[0] act = 2 * act - 1 return act
def get_cvx_opt(self, func, cz1, cz2, cz3): act = np.ones((cz1.shape[0], self.dimA)) * 0.5 def fg(x): value, grad = func(2 * x - 1, cz1, cz2, cz3) grad *= 2 return value, grad act = bundle_entropy.solveBatch(fg, act)[0] act = 2 * act - 1 return act
def main(): parser = argparse.ArgumentParser() parser.add_argument('chkpt', type=str) parser.add_argument('--save', type=str, default='work') parser.add_argument('--layerSizes', type=int, nargs='+', default=[600, 600]) parser.add_argument('--seed', type=int, default=42) parser.add_argument('--dataset', type=str, choices=['bibtex', 'bookmarks', 'delicious'], default='bibtex') args = parser.parse_args() setproctitle.setproctitle('bamos.icnn.ebundle-vs-gd.{}.{}'.format( args.dataset, ','.join(str(x) for x in args.layerSizes))) npr.seed(args.seed) tf.set_random_seed(args.seed) if args.dataset == 'bibtex': data = bibsonomy.loadBibtex("data/bibtex") elif args.dataset == 'bookmarks': data = bibsonomy.loadBookmarks("data/bookmarks") elif args.dataset == 'delicious': data = bibsonomy.loadDelicious("data/delicious") else: assert (False) nTest = data['testX'].shape[0] nFeatures = data['trainX'].shape[1] nLabels = data['trainY'].shape[1] nXy = nFeatures + nLabels nTrain = data['trainX'].shape[0] trainX = data['trainX'] trainY = data['trainY'] config = tf.ConfigProto(log_device_placement=False) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: model = icnn_ebundle.Model(nFeatures, nLabels, args.layerSizes, sess) model.load(args.chkpt) nSamples = 10 # Bundle Entropy bundleIter, bundleTime, bundleEs = [], [], [] def fg(yhats): fd = {model.x_: xBatch, model.y_: yhats} e, ge = model.sess.run([model.E_, model.dE_dy_], feed_dict=fd) return e, ge def cb(iterNum, es, x): bundleIter.append(iterNum) print(np.mean(es)) es_entr = es - entr(x) bundleEs.append(np.mean(es_entr)) bundleTime.append(time.time() - start) start = time.time() I = npr.randint(nTrain, size=nSamples) xBatch = trainX[I, :] yBatch = trainY[I, :] y0 = np.full(yBatch.shape, 0.5) yN, G, h, lam, ys, nIters = bundle_entropy.solveBatch(fg, y0, nIter=10, callback=cb) # PGD pgdIter, pgdTime, pgdEs = {}, {}, {} def fg(yhats): fd = {model.x_: xBatch, model.y_: yhats} e, ge = model.sess.run([model.E_entr_, model.dE_entr_dy_], feed_dict=fd) return e, ge def proj(x): return np.clip(x, 1e-6, 1. - 1e-6) lrs = [0.1, 0.01, 0.001] for lr in lrs: pgdIter[lr] = [] pgdTime[lr] = [] pgdEs[lr] = [] def cb(iterNum, es, gs, bestM): pgdIter[lr].append(iterNum) pgdEs[lr].append(np.mean(es)) pgdTime[lr].append(time.time() - start) start = time.time() y0 = np.full(yBatch.shape, 0.5) bamos_opt.pgd.solve_batch(fg, proj, y0, lr=lr, rollingDecay=0.5, eps=1e-3, minIter=10, maxIter=10, callback=cb) fig, ax = plt.subplots(1, 1) plt.xlabel('Iteration') plt.ylabel('Entropy-Scaled Objective') for lr in lrs: plt.plot(pgdIter[lr], pgdEs[lr], label='PGD, lr={}'.format(lr)) plt.plot(bundleIter, bundleEs, label='Bundle Entropy', color='k', linestyle='dashed') plt.legend() ax.set_yscale('log') for ext in ['png', 'pdf']: fname = os.path.join(args.save, 'obj.' + ext) plt.savefig(fname) print("Created {}".format(fname))
def train(self, args, trainX, trainY, valX, valY): save = args.save nTrain = trainX.shape[0] nTest = valX.shape[0] nIter = int(args.nEpoch * np.ceil(nTrain / args.trainBatchSz)) trainFields = ['iter', 'f1', 'loss'] trainF = open(os.path.join(save, 'train.csv'), 'w') trainW = csv.writer(trainF) trainW.writerow(trainFields) testFields = ['iter', 'f1', 'loss'] testF = open(os.path.join(save, 'test.csv'), 'w') testW = csv.writer(testF) testW.writerow(testFields) self.trainWriter = tf.train.SummaryWriter(os.path.join(save, 'train'), self.sess.graph) self.sess.run(tf.initialize_all_variables()) nParams = np.sum(v.get_shape().num_elements() for v in tf.trainable_variables()) meta = { 'nTrain': nTrain, 'trainBatchSz': args.trainBatchSz, 'nParams': nParams, 'nEpoch': args.nEpoch, 'nIter': nIter } metaP = os.path.join(save, 'meta.json') with open(metaP, 'w') as f: json.dump(meta, f, indent=2) self.sess.run(self.makeCvx) bestTestF1 = 0.0 nErrors = 0 for i in range(nIter): tflearn.is_training(True) print("=== Iteration {} (Epoch {:.2f}) ===".format( i, i / np.ceil(nTrain / args.trainBatchSz))) start = time.time() I = npr.randint(nTrain, size=args.trainBatchSz) xBatch = trainX[I, :] yBatch = trainY[I, :] def fg(yhats): fd = {self.x_: xBatch, self.y_: yhats} e, ge = self.sess.run([self.E_, self.dE_dy_], feed_dict=fd) return e, ge y0 = np.full(yBatch.shape, 0.5) try: yN, G, h, lam, ys, nIters = bundle_entropy.solveBatch( fg, y0, nIter=args.inference_nIter) except: print("Warning: Exception in bundle_entropy.solveBatch") nErrors += 1 if nErrors > 10: print("More than 10 errors raised, quitting") sys.exit(-1) continue nActive = [len(Gi) for Gi in G] l_yN = crossEntr(yBatch, yN) trainF1 = util.macroF1(yBatch, yN) fd = self.train_step_fd(args.trainBatchSz, xBatch, yBatch, G, yN, ys, lam) # fd[self.l_yN_] = l_yN # fd[self.nBundleIter_] = nIters # fd[self.nActive_] = nActive summary, _ = self.sess.run([self.merged, self.train_step], feed_dict=fd) if len(self.proj) > 0: self.sess.run(self.proj) else: print("Warning: Not projecting any weights.") self.trainWriter.add_summary(summary, i) trainW.writerow((i, trainF1, l_yN)) trainF.flush() print(" + trainF1: {:0.2f}".format(trainF1)) print(" + loss: {:0.5e}".format(l_yN)) print(" + time: {:0.2f} s".format(time.time() - start)) if i % np.ceil(nTrain / args.trainBatchSz) == 0: print("=== Testing ===") tflearn.is_training(True) def fg(yhats): fd = {self.x_: valX, self.y_: valY} e, ge = self.sess.run([self.E_, self.dE_dy_], feed_dict=fd) return e, ge y0 = np.full(valY.shape, 0.5) yN, G, h, lam, ys, _ = bundle_entropy.solveBatch( fg, y0, nIter=args.inference_nIter) testF1 = util.macroF1(valY, yN) l_yN = crossEntr(valY, yN) print(" + testF1: {:0.4f}".format(testF1)) testW.writerow((i, testF1, l_yN)) testF.flush() if testF1 > bestTestF1: print('+ Saving best model.') self.save(os.path.join(args.save, 'best.tf')) bestTestF1 = testF1 os.system('./icnn.plot.py ' + args.save) trainF.close() testF.close() meta['nErrors'] = nErrors with open(metaP, 'w') as f: json.dump(meta, f, indent=2) os.system('./icnn.plot.py ' + args.save)
def train(self, args, trainX, trainY, valX, valY): save = args.save self.meanY = np.mean(trainY, axis=0) nTrain = trainX.shape[0] nTest = valX.shape[0] nIter = int(np.ceil(args.nEpoch * nTrain / args.trainBatchSz)) trainFields = ['iter', 'loss'] trainF = open(os.path.join(save, 'train.csv'), 'w') trainW = csv.writer(trainF) trainW.writerow(trainFields) trainF.flush() testFields = ['iter', 'loss'] testF = open(os.path.join(save, 'test.csv'), 'w') testW = csv.writer(testF) testW.writerow(testFields) testF.flush() self.trainWriter = tf.train.SummaryWriter(os.path.join(save, 'train'), self.sess.graph) self.sess.run(tf.initialize_all_variables()) if not args.noncvx: self.sess.run(self.makeCvx) nParams = np.sum(v.get_shape().num_elements() for v in tf.trainable_variables()) self.nBundleIter = args.nBundleIter meta = { 'nTrain': nTrain, 'trainBatchSz': args.trainBatchSz, 'nParams': nParams, 'nEpoch': args.nEpoch, 'nIter': nIter, 'nBundleIter': self.nBundleIter } metaP = os.path.join(save, 'meta.json') with open(metaP, 'w') as f: json.dump(meta, f, indent=2) nErrors = 0 maxErrors = 20 for i in range(nIter): tflearn.is_training(True) print("=== Iteration {} (Epoch {:.2f}) ===".format( i, i / np.ceil(nTrain / args.trainBatchSz))) start = time.time() I = npr.randint(nTrain, size=args.trainBatchSz) xBatch = trainX[I, :] yBatch = trainY[I, :] yBatch_flat = yBatch.reshape((args.trainBatchSz, -1)) xBatch_flipped = xBatch[:, :, ::-1, :] def fg(yhats): yhats_shaped = yhats.reshape([args.trainBatchSz] + self.outputSz) fd = {self.x_: xBatch_flipped, self.y_: yhats_shaped} e, ge = self.sess.run([self.E_, self.dE_dyFlat_], feed_dict=fd) return e, ge y0 = np.expand_dims(self.meanY, axis=0).repeat(args.trainBatchSz, axis=0) y0 = y0.reshape((args.trainBatchSz, -1)) try: yN, G, h, lam, ys, nIters = bundle_entropy.solveBatch( fg, y0, nIter=self.nBundleIter) yN_shaped = yN.reshape([args.trainBatchSz] + self.outputSz) except (KeyboardInterrupt, SystemExit): raise except: print("Warning: Exception in bundle_entropy.solveBatch") nErrors += 1 if nErrors > maxErrors: print("More than {} errors raised, quitting".format( maxErrors)) sys.exit(-1) continue nActive = [len(Gi) for Gi in G] l_yN = mse(yBatch_flat, yN) fd = self.train_step_fd(args.trainBatchSz, xBatch_flipped, yBatch_flat, G, yN, ys, lam) fd[self.l_yN_] = l_yN fd[self.nBundleIter_] = nIters fd[self.nActive_] = nActive summary, _ = self.sess.run([self.merged, self.train_step], feed_dict=fd) if not args.noncvx and len(self.proj) > 0: self.sess.run(self.proj) saveImgs(xBatch, yN_shaped, "{}/trainImgs/{:05d}".format(args.save, i)) self.trainWriter.add_summary(summary, i) trainW.writerow((i, l_yN)) trainF.flush() print(" + loss: {:0.5e}".format(l_yN)) print(" + time: {:0.2f} s".format(time.time() - start)) if i % np.ceil(nTrain / (4.0 * args.trainBatchSz)) == 0: os.system('./icnn.plot.py ' + args.save) if i % np.ceil(nTrain / args.trainBatchSz) == 0: print("=== Testing ===") tflearn.is_training(False) y0 = np.expand_dims(self.meanY, axis=0).repeat(nTest, axis=0) y0 = y0.reshape((nTest, -1)) valX_flipped = valX[:, :, ::-1, :] def fg(yhats): yhats_shaped = yhats.reshape([nTest] + self.outputSz) fd = {self.x_: valX_flipped, self.y_: yhats_shaped} e, ge = self.sess.run([self.E_, self.dE_dyFlat_], feed_dict=fd) return e, ge try: yN, G, h, lam, ys, nIters = bundle_entropy.solveBatch( fg, y0, nIter=self.nBundleIter) yN_shaped = yN.reshape([nTest] + self.outputSz) except (KeyboardInterrupt, SystemExit): raise except: print("Warning: Exception in bundle_entropy.solveBatch") nErrors += 1 if nErrors > maxErrors: print("More than {} errors raised, quitting".format( maxErrors)) sys.exit(-1) continue testMSE = mse(valY, yN_shaped) saveImgs(valX, yN_shaped, "{}/testImgs/{:05d}".format(args.save, i)) print(" + test loss: {:0.5e}".format(testMSE)) testW.writerow((i, testMSE)) testF.flush() self.save(os.path.join(args.ckptDir, '{:05d}.tf'.format(i))) os.system('./icnn.plot.py ' + args.save) trainF.close() testF.close() os.system('./icnn.plot.py ' + args.save)
def main(): parser = argparse.ArgumentParser() parser.add_argument('chkpt', type=str) parser.add_argument('--save', type=str, default='work') parser.add_argument('--layerSizes', type=int, nargs='+', default=[600, 600]) parser.add_argument('--seed', type=int, default=42) parser.add_argument('--dataset', type=str, choices=['bibtex', 'bookmarks', 'delicious'], default='bibtex') args = parser.parse_args() setproctitle.setproctitle('bamos.icnn.ebundle') npr.seed(args.seed) tf.set_random_seed(args.seed) data = olivetti.load("data/olivetti") meanY = np.mean(data['trainY'], axis=0) nTrain = data['trainX'].shape[0] nTest = data['testX'].shape[0] inputSz = list(data['trainX'][0].shape) outputSz = list(data['trainY'][1].shape) imgDir = os.path.join(args.save, 'imgs') if not os.path.exists(imgDir): os.makedirs(imgDir) config = tf.ConfigProto(log_device_placement=False) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: model = icnn_ebundle.Model(inputSz, outputSz, sess) model.load(args.chkpt) nSamples = 1 # Bundle Entropy bundleIter, bundleTime, bundleEs = [], [], [] def fg(yhats): yhats_shaped = yhats.reshape([nSamples] + outputSz) fd = {model.x_: xBatch_flipped, model.y_: yhats_shaped} e, ge = sess.run([model.E_, model.dE_dyFlat_], feed_dict=fd) return e, ge def cb(iterNum, es, x): yhats_shaped = x.reshape([nSamples] + outputSz) plt.imsave(os.path.join(imgDir, '{:05d}.png'.format(iterNum)), yhats_shaped.squeeze(), cmap=mpl.cm.gray) bundleIter.append(iterNum) es_entr = es - entr(x) bundleEs.append(np.mean(es_entr)) bundleTime.append(time.time() - start) start = time.time() I = npr.randint(nTrain, size=nSamples) # xBatch = data['trainX'][I, :] # yBatch = data['trainY'][I, :] xBatch = data['testX'][[0], :] yBatch = data['testY'][[0], :] xBatch_flipped = xBatch[:, :, ::-1, :] y0 = np.expand_dims(meanY, axis=0).repeat(nSamples, axis=0) y0 = y0.reshape((nSamples, -1)) yN, G, h, lam, ys, nIters = bundle_entropy.solveBatch(fg, y0, nIter=30, callback=cb) yN_shaped = yN.reshape([nSamples] + outputSz) # PGD pgdIter, pgdTime, pgdEs = {}, {}, {} def fg(yhats): yhats_shaped = yhats.reshape([nSamples] + outputSz) fd = {model.x_: xBatch_flipped, model.y_: yhats_shaped} e, ge = sess.run([model.E_entr_, model.dE_entr_dyFlat_], feed_dict=fd) return e, ge def proj(x): return np.clip(x, 1e-6, 1. - 1e-6) lrs = [0.1, 0.01, 0.001] for lr in lrs: pgdIter[lr] = [] pgdTime[lr] = [] pgdEs[lr] = [] def cb(iterNum, es, gs, bestM): pgdIter[lr].append(iterNum) pgdEs[lr].append(np.mean(es)) pgdTime[lr].append(time.time() - start) start = time.time() y0 = np.expand_dims(meanY, axis=0).repeat(nSamples, axis=0) y0 = y0.reshape((nSamples, -1)) bamos_opt.pgd.solve_batch(fg, proj, y0, lr=lr, rollingDecay=0.5, eps=1e-3, minIter=50, maxIter=50, callback=cb) fig, ax = plt.subplots(1, 1) plt.xlabel('Iteration') plt.ylabel('Entropy-Scaled Objective') for lr in lrs: plt.plot(pgdIter[lr], pgdEs[lr], label='PGD, lr={}'.format(lr)) plt.plot(bundleIter, bundleEs, label='Bundle Entropy', color='k', linestyle='dashed') plt.legend() # ax.set_yscale('log') for ext in ['png', 'pdf']: fname = os.path.join(args.save, 'obj.' + ext) plt.savefig(fname) print("Created {}".format(fname))