def plot(): import matplotlib.pyplot as plt from matplotlib import rc rc('font',**{'family':'serif'}) with open('results.pkl') as f: parser, parsed_avg_hypergrad = pickle.load(f) #rc('text', usetex=True) # ----- Small versions of stepsize schedules for paper ----- fig = plt.figure(0) fig.clf() ax = fig.add_subplot(111) def layer_name(weight_key): return "Layer {num}".format(num=weight_key[1] + 1) for cur_results, name in zip(parsed_avg_hypergrad['log_alphas'].T, parser.names): if name[0] == 'weights': ax.plot(cur_results, 'o-', label=layer_name(name)) low, high = ax.get_ylim() #ax.set_ylim([0, high]) ax.set_ylabel('Learning rate radient') ax.set_xlabel('Schedule index') ax.set_yticks([0,]) ax.set_yticklabels(['0',]) fig.set_size_inches((6,2.5)) #ax.legend(numpoints=1, loc=1, frameon=False, prop={'size':'12'}) plt.savefig('schedules_small.pdf', pad_inches=0.05, bbox_inches='tight') # ----- Alpha and beta initial hypergradients ----- print "Plotting initial gradients..." fig = plt.figure(0) fig.clf() ax = fig.add_subplot(411) for cur_results, name in zip(parsed_avg_hypergrad['log_alphas'].T, parser.names): if name[0] == 'weights': ax.plot(cur_results, 'o-', label=nice_layer_name(name)) ax.set_ylabel('Step size Gradient', fontproperties='serif') ax.set_xticklabels([]) ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5), prop={'family':'serif', 'size':'12'}) ax = fig.add_subplot(412) for cur_results, name in zip(parsed_avg_hypergrad['invlogit_betas'].T, parser.names): if name[0] == 'weights': ax.plot(cur_results, 'o-', label=nice_layer_name(name)) ax.set_xlabel('Learning Iteration', fontproperties='serif') ax.set_ylabel('Momentum Gradient', fontproperties='serif') ax = fig.add_subplot(413) for cur_results, name in zip(parsed_avg_hypergrad['log_alphas'].T, parser.names): if name[0] == 'biases': ax.plot(cur_results, 'o-', label=nice_layer_name(name)) ax.set_ylabel('Step size Gradient', fontproperties='serif') ax.set_xticklabels([]) ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5), prop={'family':'serif', 'size':'12'}) ax = fig.add_subplot(414) for cur_results, name in zip(parsed_avg_hypergrad['invlogit_betas'].T, parser.names): if name[0] == 'biases': ax.plot(cur_results, 'o-', label=nice_layer_name(name)) ax.set_xlabel('Learning Iteration', fontproperties='serif') ax.set_ylabel('Momentum Gradient', fontproperties='serif') fig.set_size_inches((6,8)) #plt.show() plt.savefig('initial_gradient.png') plt.savefig('initial_gradient.pdf', pad_inches=0.05, bbox_inches='tight')
def plot(): import matplotlib.pyplot as plt from matplotlib import rc rc('font',**{'family':'serif'}) with open('results.pkl') as f: results, parser, parsed_init_hypergrad = pickle.load(f) #rc('text', usetex=True) # ----- Small versions of stepsize schedules for paper ----- fig = plt.figure(0) fig.clf() ax = fig.add_subplot(111) def layer_name(weight_key): return "Layer {num}".format(num=weight_key[1] + 1) for cur_results, name in zip(results['log_alphas'][-1].T, parser.names): if name[0] == 'weights': ax.plot(np.exp(cur_results), 'o-', label=layer_name(name)) low, high = ax.get_ylim() ax.set_ylim([0, high]) ax.set_ylabel('Learning rate') ax.set_xlabel('Schedule index') fig.set_size_inches((6,2.5)) ax.legend(numpoints=1, loc=1, frameon=False, prop={'size':'12'}) plt.savefig('schedules_small.pdf', pad_inches=0.05, bbox_inches='tight') # ----- Alpha and beta initial hypergradients ----- print "Plotting initial gradients..." fig = plt.figure(0) fig.clf() ax = fig.add_subplot(411) for cur_results, name in zip(parsed_init_hypergrad['log_alphas'].T, parser.names): if name[0] == 'weights': ax.plot(cur_results, 'o-', label=nice_layer_name(name)) ax.set_ylabel('Step size Gradient', fontproperties='serif') ax.set_xticklabels([]) ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5), prop={'family':'serif', 'size':'12'}) ax = fig.add_subplot(412) for cur_results, name in zip(parsed_init_hypergrad['invlogit_betas'].T, parser.names): if name[0] == 'weights': ax.plot(cur_results, 'o-', label=nice_layer_name(name)) ax.set_xlabel('Learning Iteration', fontproperties='serif') ax.set_ylabel('Momentum Gradient', fontproperties='serif') ax = fig.add_subplot(413) for cur_results, name in zip(parsed_init_hypergrad['log_alphas'].T, parser.names): if name[0] == 'biases': ax.plot(cur_results, 'o-', label=nice_layer_name(name)) ax.set_ylabel('Step size Gradient', fontproperties='serif') ax.set_xticklabels([]) ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5), prop={'family':'serif', 'size':'12'}) ax = fig.add_subplot(414) for cur_results, name in zip(parsed_init_hypergrad['invlogit_betas'].T, parser.names): if name[0] == 'biases': ax.plot(cur_results, 'o-', label=nice_layer_name(name)) ax.set_xlabel('Learning Iteration', fontproperties='serif') ax.set_ylabel('Momentum Gradient', fontproperties='serif') fig.set_size_inches((6,8)) #plt.show() plt.savefig('initial_gradient.png') plt.savefig('initial_gradient.pdf', pad_inches=0.05, bbox_inches='tight') # ----- Nice versions of Alpha and beta schedules for paper ----- print "Plotting full alpha and beta schedules curves..." fig = plt.figure(0) fig.clf() ax = fig.add_subplot(411) for cur_results, name in zip(results['log_alphas'][-1].T, parser.names): if name[0] == 'weights': ax.plot(np.exp(cur_results), 'o-', label=nice_layer_name(name)) low, high = ax.get_ylim() ax.set_ylim([0, high]) ax.set_ylabel('Step size', fontproperties='serif') ax.set_xticklabels([]) ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5), prop={'family':'serif', 'size':'12'}) ax = fig.add_subplot(412) for cur_results, name in zip(results['invlogit_betas'][-1].T, parser.names): if name[0] == 'weights': ax.plot(logit(cur_results), 'o-', label=nice_layer_name(name)) ax.set_ylim([0, 1]) ax.set_xlabel('Learning Iteration', fontproperties='serif') ax.set_ylabel('Momentum', fontproperties='serif') ax = fig.add_subplot(413) for cur_results, name in zip(results['log_alphas'][-1].T, parser.names): if name[0] == 'biases': ax.plot(np.exp(cur_results), 'o-', label=nice_layer_name(name)) low, high = ax.get_ylim() ax.set_ylim([0, high]) ax.set_ylabel('Step size', fontproperties='serif') ax.set_xticklabels([]) ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5), prop={'family':'serif', 'size':'12'}) ax = fig.add_subplot(414) for cur_results, name in zip(results['invlogit_betas'][-1].T, parser.names): if name[0] == 'biases': ax.plot(logit(cur_results), 'o-', label=nice_layer_name(name)) ax.set_ylim([0, 1]) ax.set_xlabel('Learning Iteration', fontproperties='serif') ax.set_ylabel('Momentum', fontproperties='serif') fig.set_size_inches((6,8)) #plt.show() plt.savefig('alpha_beta_paper.png') plt.savefig('alpha_beta_paper.pdf', pad_inches=0.05, bbox_inches='tight') print "Plotting learning curves..." fig.clf() fig.set_size_inches((6,8)) # ----- Primal learning curves ----- ax = fig.add_subplot(311) ax.set_title('Primal learning curves') for i, y in enumerate(results['learning_curves']): ax.plot(y['learning_curve'], 'o-', label='Meta iter {0}'.format(i)) ax.set_xlabel('Epoch number') ax.set_ylabel('Negative log prob') #ax.legend(loc=1, frameon=False) ax = fig.add_subplot(312) ax.set_title('Meta learning curves') losses = ['train_loss', 'valid_loss', 'tests_loss'] for loss_type in losses: ax.plot(results[loss_type], 'o-', label=loss_type) ax.set_xlabel('Meta iter number') ax.set_ylabel('Negative log prob') ax.legend(loc=1, frameon=False) ax = fig.add_subplot(313) ax.set_title('Meta-gradient magnitude') ax.plot(results['meta_grad_magnitude'], 'o-', label='Meta-gradient magnitude') ax.plot(results['meta_grad_angle'], 'o-', label='Meta-gradient angle') ax.set_xlabel('Meta iter number') ax.set_ylabel('Meta-gradient Magnitude') ax.legend(loc=1, frameon=False) plt.savefig('learning_curves.png') # ----- Learning curve info ----- print "Plotting extra learning curves..." fig.clf() ax = fig.add_subplot(311) ax.set_title('Primal learning curves') for i, y in enumerate(results['learning_curves']): ax.plot(y['grad_norm'], 'o-', label='Meta iter {0}'.format(i)) ax.set_xlabel('Epoch number') #ax.legend(loc=1, frameon=False) ax.set_title('Grad norm') ax = fig.add_subplot(312) for i, y in enumerate(results['learning_curves']): ax.plot(y['weight_norm'], 'o-', label='Meta iter {0}'.format(i)) ax.set_xlabel('Epoch number') ax.legend(loc=1, frameon=False) ax.set_title('Weight norm') ax = fig.add_subplot(313) for i, y in enumerate(results['learning_curves']): ax.plot(y['velocity_norm'], 'o-', label='Meta iter {0}'.format(i)) ax.set_xlabel('Epoch number') ax.set_title('Velocity norm') ax.legend(loc=1, frameon=False) plt.savefig('extra_learning_curves.png') # ----- Init scale and L2 reg ----- print "Plotting initialization distributions and regularization..." fig.clf() ax = fig.add_subplot(111) #ax.set_title('Init scale learning curves') for i, y in enumerate(zip(*results['log_param_scale'])): if parser.names[i][0] == 'weights': ax.plot(np.exp(y), 'o-', label=layer_name(parser.names[i])) # Show lines for theoretical optimum. y1 = 1.0/np.sqrt(layer_sizes[0]) y2 = 1.0/np.sqrt(layer_sizes[1]) ax.plot(ax.get_xlim(), (y1, y1), 'b--') ax.plot(ax.get_xlim(), (y2, y2), 'k--') ax.set_xlabel('Meta iteration') ax.set_ylabel('Initial scale') #ax.set_yscale('log') #ax.legend(loc=1, frameon=False) fig.set_size_inches((2.5,2.5)) #ax.legend(numpoints=1, loc=1, frameon=False, prop={'size':'12'}) plt.savefig('init_weight_learning_curve.pdf', pad_inches=0.05, bbox_inches='tight') fig.clf() ax = fig.add_subplot(111) #ax.set_title('Init scale learning curves') for i, y in enumerate(zip(*results['log_param_scale'])): if parser.names[i][0] == 'biases': ax.plot(np.exp(y), 'o-', label=layer_name(parser.names[i])) ax.set_xlabel('Meta iteration') #ax.set_ylabel('Scale') #ax.set_yscale('log') #ax.set_ylabel('Log param scale') fig.set_size_inches((2.5,2.5)) ax.legend(numpoints=1, loc=0, frameon=False, prop={'size':'10'}) plt.savefig('init_bias_learning_curve.pdf', pad_inches=0.05, bbox_inches='tight')