Пример #1
0
    def bundle_entropy(self, func, obs):
        act = np.ones((obs.shape[0], self.dimA)) * 0.5
        def fg(x):
            value, grad = func(obs, 2 * x - 1)
            grad *= 2
            return value, grad

        act = bundle_entropy.solveBatch(fg, act)[0]
        act = 2 * act - 1

        return act
Пример #2
0
    def get_cvx_opt(self, func, cz1, cz2, cz3):
        act = np.ones((cz1.shape[0], self.dimA)) * 0.5
        def fg(x):
            value, grad = func(2 * x - 1, cz1, cz2, cz3)
            grad *= 2
            return value, grad

        act = bundle_entropy.solveBatch(fg, act)[0]
        act = 2 * act - 1

        return act
Пример #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('chkpt', type=str)
    parser.add_argument('--save', type=str, default='work')
    parser.add_argument('--layerSizes',
                        type=int,
                        nargs='+',
                        default=[600, 600])
    parser.add_argument('--seed', type=int, default=42)
    parser.add_argument('--dataset',
                        type=str,
                        choices=['bibtex', 'bookmarks', 'delicious'],
                        default='bibtex')

    args = parser.parse_args()

    setproctitle.setproctitle('bamos.icnn.ebundle-vs-gd.{}.{}'.format(
        args.dataset, ','.join(str(x) for x in args.layerSizes)))

    npr.seed(args.seed)
    tf.set_random_seed(args.seed)

    if args.dataset == 'bibtex':
        data = bibsonomy.loadBibtex("data/bibtex")
    elif args.dataset == 'bookmarks':
        data = bibsonomy.loadBookmarks("data/bookmarks")
    elif args.dataset == 'delicious':
        data = bibsonomy.loadDelicious("data/delicious")
    else:
        assert (False)

    nTest = data['testX'].shape[0]
    nFeatures = data['trainX'].shape[1]
    nLabels = data['trainY'].shape[1]
    nXy = nFeatures + nLabels

    nTrain = data['trainX'].shape[0]
    trainX = data['trainX']
    trainY = data['trainY']

    config = tf.ConfigProto(log_device_placement=False)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        model = icnn_ebundle.Model(nFeatures, nLabels, args.layerSizes, sess)
        model.load(args.chkpt)

        nSamples = 10

        # Bundle Entropy
        bundleIter, bundleTime, bundleEs = [], [], []

        def fg(yhats):
            fd = {model.x_: xBatch, model.y_: yhats}
            e, ge = model.sess.run([model.E_, model.dE_dy_], feed_dict=fd)
            return e, ge

        def cb(iterNum, es, x):
            bundleIter.append(iterNum)
            print(np.mean(es))
            es_entr = es - entr(x)
            bundleEs.append(np.mean(es_entr))
            bundleTime.append(time.time() - start)

        start = time.time()
        I = npr.randint(nTrain, size=nSamples)
        xBatch = trainX[I, :]
        yBatch = trainY[I, :]
        y0 = np.full(yBatch.shape, 0.5)
        yN, G, h, lam, ys, nIters = bundle_entropy.solveBatch(fg,
                                                              y0,
                                                              nIter=10,
                                                              callback=cb)

        # PGD
        pgdIter, pgdTime, pgdEs = {}, {}, {}

        def fg(yhats):
            fd = {model.x_: xBatch, model.y_: yhats}
            e, ge = model.sess.run([model.E_entr_, model.dE_entr_dy_],
                                   feed_dict=fd)
            return e, ge

        def proj(x):
            return np.clip(x, 1e-6, 1. - 1e-6)

        lrs = [0.1, 0.01, 0.001]
        for lr in lrs:
            pgdIter[lr] = []
            pgdTime[lr] = []
            pgdEs[lr] = []

            def cb(iterNum, es, gs, bestM):
                pgdIter[lr].append(iterNum)
                pgdEs[lr].append(np.mean(es))
                pgdTime[lr].append(time.time() - start)

            start = time.time()
            y0 = np.full(yBatch.shape, 0.5)
            bamos_opt.pgd.solve_batch(fg,
                                      proj,
                                      y0,
                                      lr=lr,
                                      rollingDecay=0.5,
                                      eps=1e-3,
                                      minIter=10,
                                      maxIter=10,
                                      callback=cb)

        fig, ax = plt.subplots(1, 1)
        plt.xlabel('Iteration')
        plt.ylabel('Entropy-Scaled Objective')
        for lr in lrs:
            plt.plot(pgdIter[lr], pgdEs[lr], label='PGD, lr={}'.format(lr))
        plt.plot(bundleIter,
                 bundleEs,
                 label='Bundle Entropy',
                 color='k',
                 linestyle='dashed')
        plt.legend()
        ax.set_yscale('log')
        for ext in ['png', 'pdf']:
            fname = os.path.join(args.save, 'obj.' + ext)
            plt.savefig(fname)
            print("Created {}".format(fname))
Пример #4
0
    def train(self, args, trainX, trainY, valX, valY):
        save = args.save

        nTrain = trainX.shape[0]
        nTest = valX.shape[0]

        nIter = int(args.nEpoch * np.ceil(nTrain / args.trainBatchSz))

        trainFields = ['iter', 'f1', 'loss']
        trainF = open(os.path.join(save, 'train.csv'), 'w')
        trainW = csv.writer(trainF)
        trainW.writerow(trainFields)

        testFields = ['iter', 'f1', 'loss']
        testF = open(os.path.join(save, 'test.csv'), 'w')
        testW = csv.writer(testF)
        testW.writerow(testFields)

        self.trainWriter = tf.train.SummaryWriter(os.path.join(save, 'train'),
                                                  self.sess.graph)
        self.sess.run(tf.initialize_all_variables())

        nParams = np.sum(v.get_shape().num_elements()
                         for v in tf.trainable_variables())

        meta = {
            'nTrain': nTrain,
            'trainBatchSz': args.trainBatchSz,
            'nParams': nParams,
            'nEpoch': args.nEpoch,
            'nIter': nIter
        }
        metaP = os.path.join(save, 'meta.json')
        with open(metaP, 'w') as f:
            json.dump(meta, f, indent=2)

        self.sess.run(self.makeCvx)

        bestTestF1 = 0.0
        nErrors = 0
        for i in range(nIter):
            tflearn.is_training(True)

            print("=== Iteration {} (Epoch {:.2f}) ===".format(
                i, i / np.ceil(nTrain / args.trainBatchSz)))
            start = time.time()
            I = npr.randint(nTrain, size=args.trainBatchSz)
            xBatch = trainX[I, :]
            yBatch = trainY[I, :]

            def fg(yhats):
                fd = {self.x_: xBatch, self.y_: yhats}
                e, ge = self.sess.run([self.E_, self.dE_dy_], feed_dict=fd)
                return e, ge

            y0 = np.full(yBatch.shape, 0.5)
            try:
                yN, G, h, lam, ys, nIters = bundle_entropy.solveBatch(
                    fg, y0, nIter=args.inference_nIter)
            except:
                print("Warning: Exception in bundle_entropy.solveBatch")
                nErrors += 1
                if nErrors > 10:
                    print("More than 10 errors raised, quitting")
                    sys.exit(-1)
                continue

            nActive = [len(Gi) for Gi in G]
            l_yN = crossEntr(yBatch, yN)
            trainF1 = util.macroF1(yBatch, yN)

            fd = self.train_step_fd(args.trainBatchSz, xBatch, yBatch, G, yN,
                                    ys, lam)
            # fd[self.l_yN_] = l_yN
            # fd[self.nBundleIter_] = nIters
            # fd[self.nActive_] = nActive
            summary, _ = self.sess.run([self.merged, self.train_step],
                                       feed_dict=fd)
            if len(self.proj) > 0:
                self.sess.run(self.proj)
            else:
                print("Warning: Not projecting any weights.")
            self.trainWriter.add_summary(summary, i)

            trainW.writerow((i, trainF1, l_yN))
            trainF.flush()

            print(" + trainF1: {:0.2f}".format(trainF1))
            print(" + loss: {:0.5e}".format(l_yN))
            print(" + time: {:0.2f} s".format(time.time() - start))

            if i % np.ceil(nTrain / args.trainBatchSz) == 0:
                print("=== Testing ===")
                tflearn.is_training(True)

                def fg(yhats):
                    fd = {self.x_: valX, self.y_: valY}
                    e, ge = self.sess.run([self.E_, self.dE_dy_], feed_dict=fd)
                    return e, ge

                y0 = np.full(valY.shape, 0.5)
                yN, G, h, lam, ys, _ = bundle_entropy.solveBatch(
                    fg, y0, nIter=args.inference_nIter)
                testF1 = util.macroF1(valY, yN)
                l_yN = crossEntr(valY, yN)
                print(" + testF1: {:0.4f}".format(testF1))
                testW.writerow((i, testF1, l_yN))
                testF.flush()

                if testF1 > bestTestF1:
                    print('+ Saving best model.')
                    self.save(os.path.join(args.save, 'best.tf'))
                    bestTestF1 = testF1

                os.system('./icnn.plot.py ' + args.save)

        trainF.close()
        testF.close()

        meta['nErrors'] = nErrors
        with open(metaP, 'w') as f:
            json.dump(meta, f, indent=2)

        os.system('./icnn.plot.py ' + args.save)
Пример #5
0
    def train(self, args, trainX, trainY, valX, valY):
        save = args.save

        self.meanY = np.mean(trainY, axis=0)

        nTrain = trainX.shape[0]
        nTest = valX.shape[0]

        nIter = int(np.ceil(args.nEpoch * nTrain / args.trainBatchSz))

        trainFields = ['iter', 'loss']
        trainF = open(os.path.join(save, 'train.csv'), 'w')
        trainW = csv.writer(trainF)
        trainW.writerow(trainFields)
        trainF.flush()

        testFields = ['iter', 'loss']
        testF = open(os.path.join(save, 'test.csv'), 'w')
        testW = csv.writer(testF)
        testW.writerow(testFields)
        testF.flush()

        self.trainWriter = tf.train.SummaryWriter(os.path.join(save, 'train'),
                                                  self.sess.graph)
        self.sess.run(tf.initialize_all_variables())
        if not args.noncvx:
            self.sess.run(self.makeCvx)

        nParams = np.sum(v.get_shape().num_elements()
                         for v in tf.trainable_variables())

        self.nBundleIter = args.nBundleIter
        meta = {
            'nTrain': nTrain,
            'trainBatchSz': args.trainBatchSz,
            'nParams': nParams,
            'nEpoch': args.nEpoch,
            'nIter': nIter,
            'nBundleIter': self.nBundleIter
        }
        metaP = os.path.join(save, 'meta.json')
        with open(metaP, 'w') as f:
            json.dump(meta, f, indent=2)

        nErrors = 0
        maxErrors = 20
        for i in range(nIter):
            tflearn.is_training(True)

            print("=== Iteration {} (Epoch {:.2f}) ===".format(
                i, i / np.ceil(nTrain / args.trainBatchSz)))
            start = time.time()
            I = npr.randint(nTrain, size=args.trainBatchSz)
            xBatch = trainX[I, :]
            yBatch = trainY[I, :]
            yBatch_flat = yBatch.reshape((args.trainBatchSz, -1))

            xBatch_flipped = xBatch[:, :, ::-1, :]

            def fg(yhats):
                yhats_shaped = yhats.reshape([args.trainBatchSz] +
                                             self.outputSz)
                fd = {self.x_: xBatch_flipped, self.y_: yhats_shaped}
                e, ge = self.sess.run([self.E_, self.dE_dyFlat_], feed_dict=fd)
                return e, ge

            y0 = np.expand_dims(self.meanY, axis=0).repeat(args.trainBatchSz,
                                                           axis=0)
            y0 = y0.reshape((args.trainBatchSz, -1))
            try:
                yN, G, h, lam, ys, nIters = bundle_entropy.solveBatch(
                    fg, y0, nIter=self.nBundleIter)
                yN_shaped = yN.reshape([args.trainBatchSz] + self.outputSz)
            except (KeyboardInterrupt, SystemExit):
                raise
            except:
                print("Warning: Exception in bundle_entropy.solveBatch")
                nErrors += 1
                if nErrors > maxErrors:
                    print("More than {} errors raised, quitting".format(
                        maxErrors))
                    sys.exit(-1)
                continue

            nActive = [len(Gi) for Gi in G]
            l_yN = mse(yBatch_flat, yN)

            fd = self.train_step_fd(args.trainBatchSz, xBatch_flipped,
                                    yBatch_flat, G, yN, ys, lam)
            fd[self.l_yN_] = l_yN
            fd[self.nBundleIter_] = nIters
            fd[self.nActive_] = nActive
            summary, _ = self.sess.run([self.merged, self.train_step],
                                       feed_dict=fd)
            if not args.noncvx and len(self.proj) > 0:
                self.sess.run(self.proj)

            saveImgs(xBatch, yN_shaped,
                     "{}/trainImgs/{:05d}".format(args.save, i))

            self.trainWriter.add_summary(summary, i)

            trainW.writerow((i, l_yN))
            trainF.flush()

            print(" + loss: {:0.5e}".format(l_yN))
            print(" + time: {:0.2f} s".format(time.time() - start))

            if i % np.ceil(nTrain / (4.0 * args.trainBatchSz)) == 0:
                os.system('./icnn.plot.py ' + args.save)

            if i % np.ceil(nTrain / args.trainBatchSz) == 0:
                print("=== Testing ===")
                tflearn.is_training(False)

                y0 = np.expand_dims(self.meanY, axis=0).repeat(nTest, axis=0)
                y0 = y0.reshape((nTest, -1))
                valX_flipped = valX[:, :, ::-1, :]

                def fg(yhats):
                    yhats_shaped = yhats.reshape([nTest] + self.outputSz)
                    fd = {self.x_: valX_flipped, self.y_: yhats_shaped}
                    e, ge = self.sess.run([self.E_, self.dE_dyFlat_],
                                          feed_dict=fd)
                    return e, ge

                try:
                    yN, G, h, lam, ys, nIters = bundle_entropy.solveBatch(
                        fg, y0, nIter=self.nBundleIter)
                    yN_shaped = yN.reshape([nTest] + self.outputSz)
                except (KeyboardInterrupt, SystemExit):
                    raise
                except:
                    print("Warning: Exception in bundle_entropy.solveBatch")
                    nErrors += 1
                    if nErrors > maxErrors:
                        print("More than {} errors raised, quitting".format(
                            maxErrors))
                        sys.exit(-1)
                    continue

                testMSE = mse(valY, yN_shaped)

                saveImgs(valX, yN_shaped,
                         "{}/testImgs/{:05d}".format(args.save, i))

                print(" + test loss: {:0.5e}".format(testMSE))
                testW.writerow((i, testMSE))
                testF.flush()

                self.save(os.path.join(args.ckptDir, '{:05d}.tf'.format(i)))

                os.system('./icnn.plot.py ' + args.save)

        trainF.close()
        testF.close()

        os.system('./icnn.plot.py ' + args.save)
Пример #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('chkpt', type=str)
    parser.add_argument('--save', type=str, default='work')
    parser.add_argument('--layerSizes',
                        type=int,
                        nargs='+',
                        default=[600, 600])
    parser.add_argument('--seed', type=int, default=42)
    parser.add_argument('--dataset',
                        type=str,
                        choices=['bibtex', 'bookmarks', 'delicious'],
                        default='bibtex')

    args = parser.parse_args()

    setproctitle.setproctitle('bamos.icnn.ebundle')

    npr.seed(args.seed)
    tf.set_random_seed(args.seed)

    data = olivetti.load("data/olivetti")
    meanY = np.mean(data['trainY'], axis=0)

    nTrain = data['trainX'].shape[0]
    nTest = data['testX'].shape[0]

    inputSz = list(data['trainX'][0].shape)
    outputSz = list(data['trainY'][1].shape)

    imgDir = os.path.join(args.save, 'imgs')
    if not os.path.exists(imgDir):
        os.makedirs(imgDir)

    config = tf.ConfigProto(log_device_placement=False)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        model = icnn_ebundle.Model(inputSz, outputSz, sess)
        model.load(args.chkpt)

        nSamples = 1

        # Bundle Entropy
        bundleIter, bundleTime, bundleEs = [], [], []

        def fg(yhats):
            yhats_shaped = yhats.reshape([nSamples] + outputSz)
            fd = {model.x_: xBatch_flipped, model.y_: yhats_shaped}
            e, ge = sess.run([model.E_, model.dE_dyFlat_], feed_dict=fd)
            return e, ge

        def cb(iterNum, es, x):
            yhats_shaped = x.reshape([nSamples] + outputSz)
            plt.imsave(os.path.join(imgDir, '{:05d}.png'.format(iterNum)),
                       yhats_shaped.squeeze(),
                       cmap=mpl.cm.gray)
            bundleIter.append(iterNum)
            es_entr = es - entr(x)
            bundleEs.append(np.mean(es_entr))
            bundleTime.append(time.time() - start)

        start = time.time()
        I = npr.randint(nTrain, size=nSamples)
        # xBatch = data['trainX'][I, :]
        # yBatch = data['trainY'][I, :]
        xBatch = data['testX'][[0], :]
        yBatch = data['testY'][[0], :]
        xBatch_flipped = xBatch[:, :, ::-1, :]
        y0 = np.expand_dims(meanY, axis=0).repeat(nSamples, axis=0)
        y0 = y0.reshape((nSamples, -1))
        yN, G, h, lam, ys, nIters = bundle_entropy.solveBatch(fg,
                                                              y0,
                                                              nIter=30,
                                                              callback=cb)
        yN_shaped = yN.reshape([nSamples] + outputSz)

        # PGD
        pgdIter, pgdTime, pgdEs = {}, {}, {}

        def fg(yhats):
            yhats_shaped = yhats.reshape([nSamples] + outputSz)
            fd = {model.x_: xBatch_flipped, model.y_: yhats_shaped}
            e, ge = sess.run([model.E_entr_, model.dE_entr_dyFlat_],
                             feed_dict=fd)
            return e, ge

        def proj(x):
            return np.clip(x, 1e-6, 1. - 1e-6)

        lrs = [0.1, 0.01, 0.001]
        for lr in lrs:
            pgdIter[lr] = []
            pgdTime[lr] = []
            pgdEs[lr] = []

            def cb(iterNum, es, gs, bestM):
                pgdIter[lr].append(iterNum)
                pgdEs[lr].append(np.mean(es))
                pgdTime[lr].append(time.time() - start)

            start = time.time()
            y0 = np.expand_dims(meanY, axis=0).repeat(nSamples, axis=0)
            y0 = y0.reshape((nSamples, -1))
            bamos_opt.pgd.solve_batch(fg,
                                      proj,
                                      y0,
                                      lr=lr,
                                      rollingDecay=0.5,
                                      eps=1e-3,
                                      minIter=50,
                                      maxIter=50,
                                      callback=cb)

        fig, ax = plt.subplots(1, 1)
        plt.xlabel('Iteration')
        plt.ylabel('Entropy-Scaled Objective')
        for lr in lrs:
            plt.plot(pgdIter[lr], pgdEs[lr], label='PGD, lr={}'.format(lr))
        plt.plot(bundleIter,
                 bundleEs,
                 label='Bundle Entropy',
                 color='k',
                 linestyle='dashed')
        plt.legend()
        # ax.set_yscale('log')
        for ext in ['png', 'pdf']:
            fname = os.path.join(args.save, 'obj.' + ext)
            plt.savefig(fname)
            print("Created {}".format(fname))