def plotAllCombinations(aclasses, avariants, fclasses, fvariants, trials, maxsteps, maxbatchsize=10): fundic = {} ploti = 1 rows = sum([len(avariants[ac]) for ac in aclasses]) + len(aclasses) - 1 cols = len(fvariants) * len(fclasses) + len(fclasses) - 1 f_mid = int(median(range(len(fvariants)))) for ac_id, aclass in enumerate(aclasses): a_mid = int(median(range(len(avariants[aclass])))) for as_id, aparams in enumerate(avariants[aclass]): if as_id == 0 and ac_id > 0: ploti += cols for fc_id, fclass in enumerate(fclasses): if fc_id not in fundic: # shared samples across all uses of one function fun = fclass() fwrap = FunctionWrapper(trials, fun, record_samples=True) fwrap.nextSamples(maxbatchsize * (maxsteps+10)) fundic[fc_id] = fwrap._seen data = fundic[fc_id] for fs_id, fsettings in enumerate(fvariants): if fs_id == 0 and fc_id > 0: ploti += 1 fun = fclass(**fsettings) provider = DataFunctionWrapper(data, fun, shuffling=False) pylab.subplot(rows, cols, ploti); ploti += 1 plotHeatmap(provider, aclass, aparams, trials, maxsteps) if ac_id == 0 and as_id == 0 and fs_id == f_mid: pylab.title(fclass.__name__[5:]) if fs_id == 0 and as_id == a_mid: pylab.ylabel(aclass.__name__[:6]) pylab.subplots_adjust(left=0.1, bottom=0.01, right=0.99, top=0.9, wspace=0.05, hspace=0.05)
def testWrapper(dim=5): f1 = FunctionWrapper(dim, StochQuad(noiseLevel=0.1)) print f1.currentGradients(ones(dim)) print f1.currentGradients(ones(dim)) print print f1.currentGradients(ones(dim) + 0.1) f1.nextSamples() print print f1.currentGradients(ones(dim)) print
def testPlot4(trials=40, maxsteps=512): fun = StochQuad(noiseLevel=100., curvature=1) fwrap = FunctionWrapper(trials, fun, record_samples=True) fwrap.nextSamples(100000) fwrap = DataFunctionWrapper(fwrap._seen, fun, shuffling=False) for i, (aclass, aparams) in enumerate([ (vSGD, { 'batch_size': 1 }), (vSGDfd, { 'batch_size': 1 }), ]): pylab.subplot(2, 1, 2) fwrap.reset() ls = lossTraces(fwrap=fwrap, aclass=aclass, dim=trials, maxsteps=maxsteps, algoparams=aparams) plotWithPercentiles(ls, algo_colors[aclass], aclass.__name__) pylab.semilogy() pylab.xlim(0, maxsteps) pylab.legend() pylab.subplot(2, 2, i + 1) fwrap.reset() plotHeatmap(fwrap, aclass, aparams, trials, maxsteps) pylab.show()
def testAlgos(dim=3): # generate a dataset f = StochQuad(noiseLevel=0.2) fw = FunctionWrapper(dim, f, record_samples=True) [fw.nextSamples(1) for _ in range(100)] ds = fw._seen dw = DataFunctionWrapper(ds, f, shuffling=False) x0 = ones(dim) for algoclass in [SGD, SGD, OracleSGD, Almeida, Amari, RMSProp, AdaGrad, MomentumSGD, AveragingSGD]: dw.reset() print algoclass.__name__ algo = algoclass(dw, x0, callback=printy) algo.run(16)
def _runsome(): trials = 50 maxsteps = 2000 fwrap = FunctionWrapper(trials, StochQuad(noiseLevel=1, curvature=1)) for aclass in [vSGD]: for aparams in algo_variants[aclass]: for fclass in fun_classes[:2]: for fsettings in fun_settings[:2]: fwrap = FunctionWrapper(trials, fclass(**fsettings)) lossTraces(fwrap, aclass, algoparams=aparams, dim=trials, maxsteps=maxsteps, storesteps=[10])
def testPlot3(trials=100, maxsteps=2**10): fwrap = FunctionWrapper(trials, StochQuad(noiseLevel=1, curvature=1)) ploti = 1 variants = [ (SGD, { 'learning_rate': 0.1 }), (SGD, { 'learning_rate': 0.01 }), (AdaGrad, { 'init_lr': 0.3 }), (Amari, { 'init_lr': 0.1, 'time_const': 100 }), (RMSProp, { 'init_lr': 0.1 }), (OracleSGD, {}), ] ratio = 1 tot = len(variants) rows = int(pylab.sqrt(tot) / ratio) cols = (tot + rows - 1) / rows for aclass, aparams in variants: pylab.subplot(rows, cols, ploti) ploti += 1 plotHeatmap(fwrap, aclass, aparams, trials, maxsteps) pylab.title(aclass.__name__) pylab.show()
def testMinibatch(dim=4): f = StochQuad(noiseLevel=0.2) fw = FunctionWrapper(dim, f, record_samples=True) x0 = ones(dim) for mb in [1, 3, 15, 250]: print "minibatch", mb algo = SGD(fw, x0, callback=printy, batch_size=mb, learning_rate=0.1) algo.run(10) print [fw.nextSamples(1) for _ in range(2500)] dw = DataFunctionWrapper(fw._seen, f, shuffling=False) print "Fixed samples" for mb in [1, 3, 15, 250]: print "minibatch", mb dw.reset() algo = SGD(dw, x0, callback=printy, batch_size=mb, learning_rate=0.1) algo.run(10) print
def testMinibatch(dim=4): f = StochQuad(noiseLevel=0.2) fw = FunctionWrapper(dim, f, record_samples=True) x0 = ones(dim) for mb in [1,3,15,250]: print 'minibatch', mb algo = SGD(fw, x0, callback=printy, batch_size=mb, learning_rate=0.1) algo.run(10) print [fw.nextSamples(1) for _ in range(2500)] dw = DataFunctionWrapper(fw._seen, f, shuffling=False) print 'Fixed samples' for mb in [1,3,15,250]: print 'minibatch', mb dw.reset() algo = SGD(dw, x0, callback=printy, batch_size=mb, learning_rate=0.1) algo.run(10) print
def plotAllCombinations(aclasses, avariants, fclasses, fvariants, trials, maxsteps, maxbatchsize=10): fundic = {} ploti = 1 rows = sum([len(avariants[ac]) for ac in aclasses]) + len(aclasses) - 1 cols = len(fvariants) * len(fclasses) + len(fclasses) - 1 f_mid = int(median(range(len(fvariants)))) for ac_id, aclass in enumerate(aclasses): a_mid = int(median(range(len(avariants[aclass])))) for as_id, aparams in enumerate(avariants[aclass]): if as_id == 0 and ac_id > 0: ploti += cols for fc_id, fclass in enumerate(fclasses): if fc_id not in fundic: # shared samples across all uses of one function fun = fclass() fwrap = FunctionWrapper(trials, fun, record_samples=True) fwrap.nextSamples(maxbatchsize * (maxsteps + 10)) fundic[fc_id] = fwrap._seen data = fundic[fc_id] for fs_id, fsettings in enumerate(fvariants): if fs_id == 0 and fc_id > 0: ploti += 1 fun = fclass(**fsettings) provider = DataFunctionWrapper(data, fun, shuffling=False) pylab.subplot(rows, cols, ploti) ploti += 1 plotHeatmap(provider, aclass, aparams, trials, maxsteps) if ac_id == 0 and as_id == 0 and fs_id == f_mid: pylab.title(fclass.__name__[5:]) if fs_id == 0 and as_id == a_mid: pylab.ylabel(aclass.__name__[:6]) pylab.subplots_adjust(left=0.1, bottom=0.01, right=0.99, top=0.9, wspace=0.05, hspace=0.05)
def testPlot1(trials=20): f = FunctionWrapper(trials, StochQuad(noiseLevel=0.2)) ls = lossTraces(fwrap=f, aclass=SGD, dim=trials, maxsteps=100, algoparams={'learning_rate': 0.2}) pylab.plot(ls, 'b:') pylab.plot(pylab.mean(ls, axis=1), 'r-') pylab.semilogy() pylab.show()
def testPlot4(trials=40, maxsteps=512): fun = StochQuad(noiseLevel=100., curvature=1) fwrap = FunctionWrapper(trials, fun, record_samples=True) fwrap.nextSamples(100000) fwrap = DataFunctionWrapper(fwrap._seen, fun, shuffling=False) for i, (aclass, aparams) in enumerate([(vSGD, {'batch_size':1}), (vSGDfd, {'batch_size':1}), ]): pylab.subplot(2, 1, 2) fwrap.reset() ls = lossTraces(fwrap=fwrap, aclass=aclass, dim=trials, maxsteps=maxsteps, algoparams=aparams) plotWithPercentiles(ls, algo_colors[aclass], aclass.__name__) pylab.semilogy() pylab.xlim(0, maxsteps) pylab.legend() pylab.subplot(2, 2, i + 1) fwrap.reset() plotHeatmap(fwrap, aclass, aparams, trials, maxsteps) pylab.show()
def testPlot2(trials=51, maxsteps=5000): f = FunctionWrapper( trials, OptimumJumper(StochQuad(noiseLevel=10, curvature=1), jumptime=1000, jumpdist_std=1)) for aclass, aparams in [ #(SGD, {'learning_rate':0.1}), #(SGD, {'learning_rate':0.01}), #(AveragingSGD, {'learning_rate':0.01}), #(AveragingSGD, {'learning_rate':0.01, 'fixedDecay':0.1}), #(AveragingSGD, {'learning_rate':0.01, 'fixedDecay':0.1}), #(AveragingSGD, {'learning_rate':0.1}), #(AveragingSGD, {'learning_rate':1.0}), (AveragingOracle, {}), (AveragingOracle, { "fixedDecay": 0.1 }), #(AveragingOracle, {"fixedDecay":0.01}), (AdaptivelyAveragingOracle, {}), #(AdaGrad, {'init_lr':0.3}), #(Amari, {'init_lr':0.1, 'time_const':100}), #(RMSProp, {'init_lr':0.1}), (OracleSGD, {}), #(vSGD, {'verbose':False}), #(vSGDfd, {}), ]: ls = lossTraces(fwrap=f, aclass=aclass, dim=trials, maxsteps=maxsteps, algoparams=aparams) plotWithPercentiles(ls, algo_colors[aclass], aclass.__name__) pylab.semilogy() pylab.xlim(0, maxsteps) pylab.legend() pylab.show()
def testOracle(dim=3): f = FunctionWrapper(dim, StochQuad(noiseLevel=0.1)) x0 = ones(dim) algo = OracleSGD(f, x0, callback=printy, loss_target=1e-5) algo.run(100) print mean(algo.parameters **2)
def testSGD(dim=3): f = FunctionWrapper(dim, StochQuad(noiseLevel=0.2)) x0 = ones(dim) algo = SGD(f, x0, callback=printy, learning_rate=0.2, loss_target=0.01) algo.run(100) print