示例#1
0
    def hyperloss_grad(hyperparam_vect, i):
        learning_curve = []
        def callback(x, i):
            if i % len(batch_idxs) == 0:
                learning_curve.append(loss_fun(x, X=train_images, T=train_labels))

        npr.seed(i)
        N_weights = parser.vect.size
        V0 = np.zeros(N_weights)

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        layer_param_scale = [np.full(parser[name].size, 
                                     np.exp(cur_hyperparams['log_param_scale'][i]))
                             for i, name in enumerate(parser.names)]
        W0 = npr.randn(N_weights) * np.concatenate(layer_param_scale, axis=0)
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas = logit(cur_hyperparams['invlogit_betas'])
        log_L2_reg = cur_hyperparams['log_L2_reg']
        results = sgd3(indexed_loss_fun, valid_loss_fun, W0, V0,
                       alphas, betas, log_L2_reg, callback=callback)
        hypergrads = hyperparams.copy()
        hypergrads['log_L2_reg']      = results['dMd_meta']
        weights_grad = parser.new_vect(W0 * results['dMd_x'])
        hypergrads['log_param_scale'] = [np.sum(weights_grad[name])
                                         for name in parser.names]
        hypergrads['log_alphas']      = results['dMd_alphas'] * alphas
        hypergrads['invlogit_betas']  = (results['dMd_betas'] *
                                         d_logit(cur_hyperparams['invlogit_betas']))
        all_x.append(results['x_final'])
        all_learning_curves.append(learning_curve)
        return hypergrads.vect
示例#2
0
    def primal_optimizer(hyperparam_vect, i_hyper):
        def indexed_loss_fun(w, meta_vect, i_iter):
            (train_data, train_labels, L2_vect) = meta
            return loss_fun(w, train_data, train_labels, L2_vect)
            #return loss_fun(w, train_data['X'], train_data['T'], L2_vect + np.sum(fake_data.ravel()))

        learning_curve_dict = defaultdict(list)
        def callback(x, v, g, i_iter):
            if i_iter % thin == 0:
        #        learning_curve_dict['learning_curve'].append(loss_fun(x, getval(cur_hyperparams['fake_data']), fake_labels))
                learning_curve_dict['grad_norm'].append(np.linalg.norm(g))
                learning_curve_dict['weight_norm'].append(np.linalg.norm(x))
                learning_curve_dict['velocity_norm'].append(np.linalg.norm(v))


        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        fake_data = cur_hyperparams['fake_data']
        rs = RandomState((seed, i_hyper))
        W0 = fill_parser(parser, np.exp(fixed_hyperparams['log_param_scale']))
        W0 *= rs.randn(W0.size)
        alphas = np.exp(fixed_hyperparams['log_alphas'])
        betas  = logit(fixed_hyperparams['invlogit_betas'])
        L2_reg = fill_parser(parser, np.exp(fixed_hyperparams['log_L2_reg']))
        meta = kylist(fake_data, fake_labels, L2_reg)
        W_opt = sgd_parsed(grad(indexed_loss_fun), kylist(W0, alphas, betas, meta),
                           parser, callback=callback)
        cur_primal_results['weights'] = getval(W_opt).copy()
        cur_primal_results['learning_curve'] = getval(learning_curve_dict)
        return W_opt, learning_curve_dict
    def primal_optimizer(hyperparam_vect, i_hyper):
        def indexed_loss_fun(w, L2_vect, i_iter):
            rs = RandomState((seed, i_hyper, i_iter))  # Deterministic seed needed for backwards pass.
            idxs = rs.randint(N_train, size=batch_size)
            return loss_fun(w, train_data["X"][idxs], train_data["T"][idxs], L2_vect)

        learning_curve_dict = defaultdict(list)

        def callback(x, v, g, i_iter):
            if i_iter % thin == 0:
                learning_curve_dict["learning_curve"].append(loss_fun(x, **train_data))
                learning_curve_dict["grad_norm"].append(np.linalg.norm(g))
                learning_curve_dict["weight_norm"].append(np.linalg.norm(x))
                learning_curve_dict["velocity_norm"].append(np.linalg.norm(v))

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        rs = RandomState((seed, i_hyper))
        W0 = fill_parser(parser, np.exp(cur_hyperparams["log_param_scale"]))
        W0 *= rs.randn(W0.size)
        alphas = np.exp(cur_hyperparams["log_alphas"])
        betas = logit(cur_hyperparams["invlogit_betas"])
        L2_reg = fill_parser(parser, np.exp(fixed_hyperparams["log_L2_reg"]))
        W_opt = sgd4(grad(indexed_loss_fun), kylist(W0, alphas, betas, L2_reg), callback)
        # callback(W_opt, N_iters)
        return W_opt, learning_curve_dict
示例#4
0
    def hyperloss_grad(hyperparam_vect, ii):
        learning_curve = []
        params_curve = []
        def callback(x, i):
            params_curve.append(x)
            learning_curve.append(loss_fun(x))

        def indexed_loss_fun(w, log_L2_reg, j):
            return loss_fun(w)

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        W0 = np.ones(N_weights) * init_param_scale
        V0 = cur_hyperparams['V0']
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas = logit(cur_hyperparams['invlogit_betas'])
        log_L2_reg = 0.0
        results = sgd3(indexed_loss_fun, loss_fun, W0, V0,
                       alphas, betas, log_L2_reg, callback=callback)
        hypergrads = hyperparams.copy()
        hypergrads['V0']              = results['dMd_v']
        hypergrads['log_alphas']      = results['dMd_alphas'] * alphas
        hypergrads['invlogit_betas']  = (results['dMd_betas'] *
                                         d_logit(cur_hyperparams['invlogit_betas']))
        all_x.append(results['x_final'])
        all_learning_curves.append(learning_curve)
        all_param_curves.append(params_curve)
        return hypergrads.vect
示例#5
0
    def primal_optimizer(hyperparam_vect, i_hyper):
        def indexed_loss_fun(w, L2_vect, i_iter):
            rs = RandomState((seed, i_hyper, i_iter))  # Deterministic seed needed for backwards pass.
            idxs = rs.randint(N_train, size=batch_size)
            return loss_fun(w, train_data['X'][idxs], train_data['T'][idxs], L2_vect)

        learning_curve_dict = defaultdict(list)
        def callback(x, v, g, i_iter):
            if i_iter % thin == 0 or i_iter == N_iters or i_iter == 0:
                learning_curve_dict['learning_curve'].append(loss_fun(x, **train_data))
                learning_curve_dict['grad_norm'].append(np.linalg.norm(g))
                learning_curve_dict['weight_norm'].append(np.linalg.norm(x))
                learning_curve_dict['velocity_norm'].append(np.linalg.norm(v))
                learning_curve_dict['iteration'].append(i_iter + 1)
                print "iteration", i_iter

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        rs = RandomState((seed, i_hyper))
        W0 = fill_parser(parser, np.exp(cur_hyperparams['log_param_scale']))
        W0 *= rs.randn(W0.size)
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas  = logit(cur_hyperparams['invlogit_betas'])
        L2_reg = fill_parser(parser, np.exp(fixed_hyperparams['log_L2_reg']))
        W_opt = sgd_parsed(grad(indexed_loss_fun), kylist(W0, alphas, betas, L2_reg),
                           parser, callback=callback)
        return W_opt, learning_curve_dict
    def primal_optimizer(hyperparam_vect, i_hyper):
        learning_curve = []
        def callback(x, i_iter):
            learning_curve.append(loss_fun(x))

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        W0 = fill_parser(parser, np.exp(cur_hyperparams['log_param_scale']))
        W0 *= npr.RandomState(hash(i_hyper)).randn(W0.size)
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas  = logit(cur_hyperparams['invlogit_betas'])
        L2_reg = fill_parser(parser, np.exp(cur_hyperparams['log_L2_reg']))
        W_opt = sgd4(grad(loss_fun), kylist(W0, alphas, betas, L2_reg), callback)
        callback(W_opt, N_iters)
        return W_opt, learning_curve
示例#7
0
def run():
    N_iters = N_epochs
    parser, loss_fun = make_toy_funs()
    N_weights = parser.vect.size
    hyperparams = VectorParser()
    hyperparams["log_alphas"] = np.full(N_iters, init_log_alphas)
    hyperparams["invlogit_betas"] = np.full(N_iters, init_invlogit_betas)
    hyperparams["V0"] = np.full(N_weights, init_V0)

    forward_path = []
    forward_learning_curve = []

    def fwd_callback(x, i):
        print type(x[0])
        forward_path.append(x.copy())
        forward_learning_curve.append(loss_fun(x))

    reverse_path = []
    reverse_learning_curve = []

    def reverse_callback(x, i):
        reverse_path.append(x.copy())
        reverse_learning_curve.append(loss_fun(x))

    def indexed_loss_fun(w, log_L2_reg, j):
        return loss_fun(w)

    cur_hyperparams = hyperparams
    W0 = init_params
    V0 = cur_hyperparams["V0"]
    alphas = np.exp(cur_hyperparams["log_alphas"])
    betas = logit(cur_hyperparams["invlogit_betas"])
    log_L2_reg = 0.0
    sgd3_naive(
        indexed_loss_fun,
        W0,
        V0,
        alphas,
        betas,
        log_L2_reg,
        fwd_callback=fwd_callback,
        reverse_callback=reverse_callback,
    )

    return forward_path, forward_learning_curve, reverse_path, reverse_learning_curve
    def primal_optimizer(hyperparam_vect, i_hyper):
        def indexed_loss_fun(w, L2_vect, i_iter):
            seed = i_hyper * 10**6 + i_iter
            idxs = npr.RandomState(seed).randint(N_train, size=batch_size)
            return loss_fun(w, train_data['X'][idxs], train_data['T'][idxs], L2_vect)

        learning_curve = []
        def callback(x, v, g, i_iter):
            if i_iter % N_batches == 0:
                learning_curve.append(loss_fun(x, **train_data))

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        W0 = fill_parser(parser, np.exp(cur_hyperparams['log_param_scale']))
        W0 *= npr.RandomState(i_hyper).randn(W0.size)
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas  = logit(cur_hyperparams['invlogit_betas'])
        L2_reg = fill_parser(parser, np.exp(cur_hyperparams['log_L2_reg']))
        V0 = np.zeros(W0.size)
        W_opt = sgd4(grad(indexed_loss_fun), kylist(W0, alphas, betas, L2_reg), callback)
        return W_opt, learning_curve
    def hyperloss(hyperparam_vect, i):
        learning_curve = []
        def callback(x, i):
            if i % len(batch_idxs) == 0:
                learning_curve.append(loss_fun(x, X=train_images, T=train_labels))

        npr.seed(i)
        N_weights = parser.vect.size
        V0 = np.zeros(N_weights)
        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        layer_param_scale = [np.full(parser[name].size, 
                                     np.exp(cur_hyperparams['log_param_scale'][i]))
                             for i, name in enumerate(parser.names)]
        W0 = npr.randn(N_weights) * np.concatenate(layer_param_scale, axis=0)
        alphas     = np.exp(cur_hyperparams['log_alphas'])
        betas      =  logit(cur_hyperparams['invlogit_betas'])
        log_L2_reg =        cur_hyperparams['log_L2_reg']
        W_opt = sgd5(grad(indexed_loss_fun), kylist(W0, alphas, betas, log_L2_reg), callback)
        all_x.append(getval(W_opt))
        all_learning_curves.append(learning_curve)
        return valid_loss_fun(W_opt)
    def primal_optimizer(hyperparam_vect, i_hyper):
        def indexed_loss_fun(w, L2_vect, i_iter):
            rs = npr.RandomState(npr.RandomState(global_seed + i_hyper).randint(1000))
            seed = i_hyper * 10**6 + i_iter   # Deterministic seed needed for backwards pass.
            idxs = rs.randint(N_train, size=batch_size)
            return loss_fun(w, train_data['X'][idxs], train_data['T'][idxs], L2_vect)

        learning_curve = []
        def callback(x, i_iter):
            if i_iter % N_batches == 0:
                learning_curve.append(loss_fun(x, **train_data))

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        W0 = fill_parser(parser, np.exp(cur_hyperparams['log_param_scale']))
        W0 *= npr.RandomState(global_seed + i_hyper).randn(W0.size)
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas  = logit(cur_hyperparams['invlogit_betas'])
        L2_reg = fill_parser(parser, np.exp(cur_hyperparams['log_L2_reg']))
        W_opt = sgd4(grad(indexed_loss_fun), kylist(W0, alphas, betas, L2_reg), callback)
        callback(W_opt, N_iters)
        return W_opt, learning_curve
示例#11
0
def plot():

    import matplotlib.pyplot as plt
    from matplotlib import rc
    rc('font',**{'family':'serif'})

    with open('results.pkl') as f:
        results, parser, parsed_init_hypergrad = pickle.load(f)

    #rc('text', usetex=True)

   # ----- Small versions of stepsize schedules for paper -----
    fig = plt.figure(0)
    fig.clf()
    ax = fig.add_subplot(111)
    def layer_name(weight_key):
        return "Layer {num}".format(num=weight_key[1] + 1)
    for cur_results, name in zip(results['log_alphas'][-1].T, parser.names):
        if name[0] == 'weights':
            ax.plot(np.exp(cur_results), 'o-', label=layer_name(name))
    low, high = ax.get_ylim()
    ax.set_ylim([0, high])
    ax.set_ylabel('Learning rate')
    ax.set_xlabel('Schedule index')
    fig.set_size_inches((6,2.5))
    ax.legend(numpoints=1, loc=1, frameon=False, prop={'size':'12'})
    plt.savefig('schedules_small.pdf', pad_inches=0.05, bbox_inches='tight')


    # ----- Alpha and beta initial hypergradients -----
    print "Plotting initial gradients..."
    fig = plt.figure(0)
    fig.clf()
    ax = fig.add_subplot(411)
    for cur_results, name in zip(parsed_init_hypergrad['log_alphas'].T, parser.names):
        if name[0] == 'weights':
            ax.plot(cur_results, 'o-', label=nice_layer_name(name))
    ax.set_ylabel('Step size Gradient', fontproperties='serif')
    ax.set_xticklabels([])
    ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5),
              prop={'family':'serif', 'size':'12'})

    ax = fig.add_subplot(412)
    for cur_results, name in zip(parsed_init_hypergrad['invlogit_betas'].T, parser.names):
        if name[0] == 'weights':
            ax.plot(cur_results, 'o-', label=nice_layer_name(name))
    ax.set_xlabel('Learning Iteration', fontproperties='serif')
    ax.set_ylabel('Momentum Gradient', fontproperties='serif')

    ax = fig.add_subplot(413)
    for cur_results, name in zip(parsed_init_hypergrad['log_alphas'].T, parser.names):
        if name[0] == 'biases':
            ax.plot(cur_results, 'o-', label=nice_layer_name(name))
    ax.set_ylabel('Step size Gradient', fontproperties='serif')
    ax.set_xticklabels([])
    ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5),
              prop={'family':'serif', 'size':'12'})

    ax = fig.add_subplot(414)
    for cur_results, name in zip(parsed_init_hypergrad['invlogit_betas'].T, parser.names):
        if name[0] == 'biases':
            ax.plot(cur_results, 'o-', label=nice_layer_name(name))
    ax.set_xlabel('Learning Iteration', fontproperties='serif')
    ax.set_ylabel('Momentum Gradient', fontproperties='serif')

    fig.set_size_inches((6,8))
    #plt.show()
    plt.savefig('initial_gradient.png')
    plt.savefig('initial_gradient.pdf', pad_inches=0.05, bbox_inches='tight')


    # ----- Nice versions of Alpha and beta schedules for paper -----
    print "Plotting full alpha and beta schedules curves..."
    fig = plt.figure(0)
    fig.clf()
    ax = fig.add_subplot(411)
    for cur_results, name in zip(results['log_alphas'][-1].T, parser.names):
        if name[0] == 'weights':
            ax.plot(np.exp(cur_results), 'o-', label=nice_layer_name(name))
    low, high = ax.get_ylim()
    ax.set_ylim([0, high])
    ax.set_ylabel('Step size', fontproperties='serif')
    ax.set_xticklabels([])
    ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5),
              prop={'family':'serif', 'size':'12'})

    ax = fig.add_subplot(412)
    for cur_results, name in zip(results['invlogit_betas'][-1].T, parser.names):
        if name[0] == 'weights':
            ax.plot(logit(cur_results), 'o-', label=nice_layer_name(name))
    ax.set_ylim([0, 1])
    ax.set_xlabel('Learning Iteration', fontproperties='serif')
    ax.set_ylabel('Momentum', fontproperties='serif')

    ax = fig.add_subplot(413)
    for cur_results, name in zip(results['log_alphas'][-1].T, parser.names):
        if name[0] == 'biases':
            ax.plot(np.exp(cur_results), 'o-', label=nice_layer_name(name))
    low, high = ax.get_ylim()
    ax.set_ylim([0, high])
    ax.set_ylabel('Step size', fontproperties='serif')
    ax.set_xticklabels([])
    ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5),
              prop={'family':'serif', 'size':'12'})

    ax = fig.add_subplot(414)
    for cur_results, name in zip(results['invlogit_betas'][-1].T, parser.names):
        if name[0] == 'biases':
            ax.plot(logit(cur_results), 'o-', label=nice_layer_name(name))
    ax.set_ylim([0, 1])
    ax.set_xlabel('Learning Iteration', fontproperties='serif')
    ax.set_ylabel('Momentum', fontproperties='serif')
    fig.set_size_inches((6,8))
    #plt.show()
    plt.savefig('alpha_beta_paper.png')
    plt.savefig('alpha_beta_paper.pdf', pad_inches=0.05, bbox_inches='tight')



    print "Plotting learning curves..."
    fig.clf()
    fig.set_size_inches((6,8))
    # ----- Primal learning curves -----
    ax = fig.add_subplot(311)
    ax.set_title('Primal learning curves')
    for i, y in enumerate(results['learning_curves']):
        ax.plot(y['learning_curve'], 'o-', label='Meta iter {0}'.format(i))
    ax.set_xlabel('Epoch number')
    ax.set_ylabel('Negative log prob')
    #ax.legend(loc=1, frameon=False)

    ax = fig.add_subplot(312)
    ax.set_title('Meta learning curves')
    losses = ['train_loss', 'valid_loss', 'tests_loss']
    for loss_type in losses:
        ax.plot(results[loss_type], 'o-', label=loss_type)
    ax.set_xlabel('Meta iter number')
    ax.set_ylabel('Negative log prob')
    ax.legend(loc=1, frameon=False)

    ax = fig.add_subplot(313)
    ax.set_title('Meta-gradient magnitude')
    ax.plot(results['meta_grad_magnitude'], 'o-', label='Meta-gradient magnitude')
    ax.plot(results['meta_grad_angle'], 'o-', label='Meta-gradient angle')
    ax.set_xlabel('Meta iter number')
    ax.set_ylabel('Meta-gradient Magnitude')
    ax.legend(loc=1, frameon=False)

    plt.savefig('learning_curves.png')

    # ----- Learning curve info -----
    print "Plotting extra learning curves..."
    fig.clf()
    ax = fig.add_subplot(311)
    ax.set_title('Primal learning curves')
    for i, y in enumerate(results['learning_curves']):
        ax.plot(y['grad_norm'], 'o-', label='Meta iter {0}'.format(i))
    ax.set_xlabel('Epoch number')
    #ax.legend(loc=1, frameon=False)
    ax.set_title('Grad norm')

    ax = fig.add_subplot(312)
    for i, y in enumerate(results['learning_curves']):
        ax.plot(y['weight_norm'], 'o-', label='Meta iter {0}'.format(i))
    ax.set_xlabel('Epoch number')
    ax.legend(loc=1, frameon=False)
    ax.set_title('Weight norm')

    ax = fig.add_subplot(313)
    for i, y in enumerate(results['learning_curves']):
        ax.plot(y['velocity_norm'], 'o-', label='Meta iter {0}'.format(i))
    ax.set_xlabel('Epoch number')
    ax.set_title('Velocity norm')
    ax.legend(loc=1, frameon=False)
    plt.savefig('extra_learning_curves.png')

    # ----- Init scale and L2 reg -----
    print "Plotting initialization distributions and regularization..."
    fig.clf()
    ax = fig.add_subplot(111)
    #ax.set_title('Init scale learning curves')
    for i, y in enumerate(zip(*results['log_param_scale'])):
        if parser.names[i][0] == 'weights':
            ax.plot(np.exp(y), 'o-', label=layer_name(parser.names[i]))
    # Show lines for theoretical optimum.
    y1 = 1.0/np.sqrt(layer_sizes[0])
    y2 = 1.0/np.sqrt(layer_sizes[1])
    ax.plot(ax.get_xlim(), (y1, y1), 'b--')
    ax.plot(ax.get_xlim(), (y2, y2), 'k--')
    ax.set_xlabel('Meta iteration')
    ax.set_ylabel('Initial scale')
    #ax.set_yscale('log')
    #ax.legend(loc=1, frameon=False)

    fig.set_size_inches((2.5,2.5))
    #ax.legend(numpoints=1, loc=1, frameon=False, prop={'size':'12'})
    plt.savefig('init_weight_learning_curve.pdf', pad_inches=0.05, bbox_inches='tight')

    fig.clf()
    ax = fig.add_subplot(111)
    #ax.set_title('Init scale learning curves')
    for i, y in enumerate(zip(*results['log_param_scale'])):
        if parser.names[i][0] == 'biases':
            ax.plot(np.exp(y), 'o-', label=layer_name(parser.names[i]))
    ax.set_xlabel('Meta iteration')
    #ax.set_ylabel('Scale')
    #ax.set_yscale('log')
    #ax.set_ylabel('Log param scale')
    fig.set_size_inches((2.5,2.5))
    ax.legend(numpoints=1, loc=0, frameon=False, prop={'size':'10'})
    plt.savefig('init_bias_learning_curve.pdf', pad_inches=0.05, bbox_inches='tight')
def plot():

    import matplotlib.pyplot as plt

    with open("results.pkl") as f:
        results, parser = pickle.load(f)

    # ----- Nice versions of Alpha and beta schedules for paper -----
    fig = plt.figure(0)
    fig.clf()
    ax = fig.add_subplot(211)
    # ax.set_title('Alpha learning curves')
    for cur_results, name in zip(results["log_alphas"][-1].T, parser.names):
        ax.plot(np.exp(cur_results), "o-", label=name)
    # ax.set_xlabel('Learning Iteration', fontproperties='serif')
    low, high = ax.get_ylim()
    ax.set_ylim([0, high])
    ax.set_ylabel("Step size", fontproperties="serif")
    ax.set_xticklabels([])
    ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5), prop={"family": "serif", "size": "12"})

    ax = fig.add_subplot(212)
    # ax.set_title('Alpha learning curves')
    for cur_results, name in zip(results["invlogit_betas"][-1].T, parser.names):
        ax.plot(logit(cur_results), "o-", label=name)
    low, high = ax.get_ylim()
    ax.set_ylim([0, 1])
    ax.set_xlabel("Learning Iteration", fontproperties="serif")
    ax.set_ylabel("Momentum", fontproperties="serif")

    fig.set_size_inches((6, 3))
    # plt.show()
    plt.savefig("alpha_beta_paper.png")
    plt.savefig("alpha_beta_paper.pdf", pad_inches=0.05, bbox_inches="tight")

    fig.clf()
    fig.set_size_inches((6, 8))
    # ----- Primal learning curves -----
    ax = fig.add_subplot(311)
    ax.set_title("Primal learning curves")
    for i, y in enumerate(results["learning_curves"]):
        ax.plot(y["learning_curve"], "o-", label="Meta iter {0}".format(i))
    ax.set_xlabel("Epoch number")
    ax.set_ylabel("Negative log prob")
    # ax.legend(loc=1, frameon=False)

    ax = fig.add_subplot(312)
    ax.set_title("Meta learning curves")
    losses = ["train_loss", "valid_loss", "tests_loss"]
    for loss_type in losses:
        ax.plot(results[loss_type], "o-", label=loss_type)
    ax.set_xlabel("Meta iter number")
    ax.set_ylabel("Negative log prob")
    ax.legend(loc=1, frameon=False)

    ax = fig.add_subplot(313)
    ax.set_title("Meta-gradient magnitude")
    ax.plot(results["meta_grad_magnitude"], "o-", label="Meta-gradient magnitude")
    ax.plot(results["meta_grad_angle"], "o-", label="Meta-gradient angle")
    ax.set_xlabel("Meta iter number")
    ax.set_ylabel("Meta-gradient Magnitude")
    ax.legend(loc=1, frameon=False)

    plt.savefig("learning_curves.png")

    # ----- Learning curve info -----
    fig.clf()
    ax = fig.add_subplot(311)
    ax.set_title("Primal learning curves")
    for i, y in enumerate(results["learning_curves"]):
        ax.plot(y["grad_norm"], "o-", label="Meta iter {0}".format(i))
    ax.set_xlabel("Epoch number")
    # ax.legend(loc=1, frameon=False)
    ax.set_title("Grad norm")

    ax = fig.add_subplot(312)
    for i, y in enumerate(results["learning_curves"]):
        ax.plot(y["weight_norm"], "o-", label="Meta iter {0}".format(i))
    ax.set_xlabel("Epoch number")
    ax.legend(loc=1, frameon=False)
    ax.set_title("Weight norm")

    ax = fig.add_subplot(313)
    for i, y in enumerate(results["learning_curves"]):
        ax.plot(y["velocity_norm"], "o-", label="Meta iter {0}".format(i))
    ax.set_xlabel("Epoch number")
    ax.set_title("Velocity norm")
    ax.legend(loc=1, frameon=False)
    plt.savefig("extra_learning_curves.png")

    # ----- Alpha and beta schedules -----
    fig.clf()
    ax = fig.add_subplot(211)
    ax.set_title("Alpha learning curves")
    for i, y in enumerate(results["log_alphas"]):
        ax.plot(y, "o-", label="Meta iter {0}".format(i))
    ax.set_xlabel("Primal iter number")
    # ax.set_ylabel('Log alpha')
    ax.legend(loc=1, frameon=False)

    ax = fig.add_subplot(212)
    ax.set_title("Beta learning curves")
    for y in results["invlogit_betas"]:
        ax.plot(y, "o-")
    ax.set_xlabel("Primal iter number")
    ax.set_ylabel("Inv logit beta")
    plt.savefig("alpha_beta_curves.png")

    # ----- Init scale and L2 reg -----
    fig.clf()
    ax = fig.add_subplot(211)
    ax.set_title("Init scale learning curves")
    for i, y in enumerate(zip(*results["log_param_scale"])):
        ax.plot(y, "o-", label=parser.names[i])
    ax.set_xlabel("Meta iter number")
    ax.set_ylabel("Log param scale")
    ax.legend(loc=1, frameon=False)

    ax = fig.add_subplot(212)
    ax.set_title("L2 reg learning curves")
    for i, y in enumerate(zip(*results["log_L2_reg"])):
        ax.plot(y, "o-", label=parser.names[i])
    ax.set_xlabel("Meta iter number")
    ax.set_ylabel("Log L2 reg")
    ax.legend(loc=1, frameon=False)
    plt.savefig("scale_and_reg.png")
示例#13
0
def test_inv_logit():
    assert np.allclose(inv_logit(logit(0.5)), 0.5, rtol=1e-3, atol=1e-4)
    assert np.allclose(inv_logit(logit(0.6)), 0.6, rtol=1e-3, atol=1e-4)
    assert np.allclose(inv_logit(logit(0.1)), 0.1, rtol=1e-3, atol=1e-4)
    assert np.allclose(inv_logit(logit(0.2)), 0.2, rtol=1e-3, atol=1e-4)
示例#14
0
def plot():
    import matplotlib.pyplot as plt
    with open('results.pkl') as f:
        results, parser = pickle.load(f)

    # ----- Nice versions of Alpha and beta schedules for paper -----
    fig = plt.figure(0)
    fig.clf()
    ax = fig.add_subplot(211)
    #ax.set_title('Alpha learning curves')
    ax.plot(np.exp(results['log_alphas'][-1]), 'o-', label="Step size")
    #ax.set_xlabel('Learning Iteration', fontproperties='serif')
    low, high = ax.get_ylim()
    ax.set_ylim([0, high])
    ax.set_ylabel('Step size', fontproperties='serif')
    ax.set_xticklabels([])

    ax = fig.add_subplot(212)
    #ax.set_title('Alpha learning curves')
    ax.plot(logit(results['invlogit_betas'][-1]), 'go-', label="Momentum")
    low, high = ax.get_ylim()
    ax.set_ylim([low, 1])
    ax.set_xlabel('Learning Iteration', fontproperties='serif')
    ax.set_ylabel('Momentum', fontproperties='serif')

    #ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5),
    #          prop={'family':'serif', 'size':'12'})
    fig.set_size_inches((6, 3))
    #plt.show()
    plt.savefig('alpha_beta_paper.png')
    plt.savefig('alpha_beta_paper.pdf', pad_inches=0.05, bbox_inches='tight')

    fig.clf()
    fig.set_size_inches((6, 8))
    # ----- Primal learning curves -----
    ax = fig.add_subplot(311)
    ax.set_title('Primal learning curves')
    for i, y in enumerate(results['learning_curves']):
        ax.plot(y['learning_curve'], 'o-', label='Meta iter {0}'.format(i))
    ax.set_xlabel('Epoch number')
    ax.set_ylabel('Negative log prob')
    #ax.legend(loc=1, frameon=False)

    ax = fig.add_subplot(312)
    ax.set_title('Meta learning curves')
    losses = ['train_loss', 'valid_loss', 'tests_loss']
    for loss_type in losses:
        ax.plot(results[loss_type], 'o-', label=loss_type)
    ax.set_xlabel('Meta iter number')
    ax.set_ylabel('Negative log prob')
    ax.legend(loc=1, frameon=False)

    ax = fig.add_subplot(313)
    ax.set_title('Meta-gradient magnitude')
    ax.plot(results['meta_grad_magnitude'],
            'o-',
            label='Meta-gradient magnitude')
    ax.plot(results['meta_grad_angle'], 'o-', label='Meta-gradient angle')
    ax.set_xlabel('Meta iter number')
    ax.set_ylabel('Meta-gradient Magnitude')
    ax.legend(loc=1, frameon=False)

    plt.savefig('learning_curves.png')

    # ----- Learning curve info -----
    fig.clf()
    ax = fig.add_subplot(311)
    ax.set_title('Primal learning curves')
    for i, y in enumerate(results['learning_curves']):
        ax.plot(y['grad_norm'], 'o-', label='Meta iter {0}'.format(i))
    ax.set_xlabel('Epoch number')
    #ax.legend(loc=1, frameon=False)
    ax.set_title('Grad norm')

    ax = fig.add_subplot(312)
    for i, y in enumerate(results['learning_curves']):
        ax.plot(y['weight_norm'], 'o-', label='Meta iter {0}'.format(i))
    ax.set_xlabel('Epoch number')
    ax.legend(loc=1, frameon=False)
    ax.set_title('Weight norm')

    ax = fig.add_subplot(313)
    for i, y in enumerate(results['learning_curves']):
        ax.plot(y['velocity_norm'], 'o-', label='Meta iter {0}'.format(i))
    ax.set_xlabel('Epoch number')
    ax.set_title('Velocity norm')
    ax.legend(loc=1, frameon=False)
    plt.savefig('extra_learning_curves.png')

    # ----- Alpha and beta schedules -----
    fig.clf()
    ax = fig.add_subplot(211)
    ax.set_title('Alpha learning curves')
    for i, y in enumerate(results['log_alphas']):
        ax.plot(y, 'o-', label="Meta iter {0}".format(i))
    ax.set_xlabel('Primal iter number')
    #ax.set_ylabel('Log alpha')
    ax.legend(loc=1, frameon=False)

    ax = fig.add_subplot(212)
    ax.set_title('Beta learning curves')
    for y in results['invlogit_betas']:
        ax.plot(y, 'o-')
    ax.set_xlabel('Primal iter number')
    ax.set_ylabel('Inv logit beta')
    plt.savefig('alpha_beta_curves.png')

    # ----- Init scale and L2 reg -----
    fig.clf()
    ax = fig.add_subplot(211)
    ax.set_title('Init scale learning curves')
    for i, y in enumerate(zip(*results['log_param_scale'])):
        ax.plot(y, 'o-', label=parser.names[i])
    ax.set_xlabel('Meta iter number')
    ax.set_ylabel('Log param scale')
    ax.legend(loc=1, frameon=False)

    ax = fig.add_subplot(212)
    ax.set_title('L2 reg learning curves')
    for i, y in enumerate(zip(*results['log_L2_reg'])):
        ax.plot(y, 'o-', label=parser.names[i])
    ax.set_xlabel('Meta iter number')
    ax.set_ylabel('Log L2 reg')
    ax.legend(loc=1, frameon=False)
    plt.savefig('scale_and_reg.png')
示例#15
0
def d_logit(x):
    return logit(x) * (1 - logit(x))
示例#16
0
def plot():

    import matplotlib.pyplot as plt
    from matplotlib import rc
    rc('font',**{'family':'serif'})

    with open('results.pkl') as f:
        results, parser, parsed_init_hypergrad = pickle.load(f)

    #rc('text', usetex=True)

   # ----- Small versions of stepsize schedules for paper -----
    fig = plt.figure(0)
    fig.clf()
    ax = fig.add_subplot(111)
    def layer_name(weight_key):
        return "Layer {num}".format(num=weight_key[1] + 1)
    for cur_results, name in zip(results['log_alphas'][-1].T, parser.names):
        if name[0] == 'weights':
            ax.plot(np.exp(cur_results), 'o-', label=layer_name(name))
    low, high = ax.get_ylim()
    ax.set_ylim([0, high])
    ax.set_ylabel('Learning rate')
    ax.set_xlabel('Schedule index')
    fig.set_size_inches((6,2.5))
    ax.legend(numpoints=1, loc=1, frameon=False, prop={'size':'12'})
    plt.savefig('schedules_small.pdf', pad_inches=0.05, bbox_inches='tight')


    # ----- Alpha and beta initial hypergradients -----
    print "Plotting initial gradients..."
    fig = plt.figure(0)
    fig.clf()
    ax = fig.add_subplot(411)
    for cur_results, name in zip(parsed_init_hypergrad['log_alphas'].T, parser.names):
        if name[0] == 'weights':
            ax.plot(cur_results, 'o-', label=nice_layer_name(name))
    ax.set_ylabel('Step size Gradient', fontproperties='serif')
    ax.set_xticklabels([])
    ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5),
              prop={'family':'serif', 'size':'12'})

    ax = fig.add_subplot(412)
    for cur_results, name in zip(parsed_init_hypergrad['invlogit_betas'].T, parser.names):
        if name[0] == 'weights':
            ax.plot(cur_results, 'o-', label=nice_layer_name(name))
    ax.set_xlabel('Learning Iteration', fontproperties='serif')
    ax.set_ylabel('Momentum Gradient', fontproperties='serif')

    ax = fig.add_subplot(413)
    for cur_results, name in zip(parsed_init_hypergrad['log_alphas'].T, parser.names):
        if name[0] == 'biases':
            ax.plot(cur_results, 'o-', label=nice_layer_name(name))
    ax.set_ylabel('Step size Gradient', fontproperties='serif')
    ax.set_xticklabels([])
    ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5),
              prop={'family':'serif', 'size':'12'})

    ax = fig.add_subplot(414)
    for cur_results, name in zip(parsed_init_hypergrad['invlogit_betas'].T, parser.names):
        if name[0] == 'biases':
            ax.plot(cur_results, 'o-', label=nice_layer_name(name))
    ax.set_xlabel('Learning Iteration', fontproperties='serif')
    ax.set_ylabel('Momentum Gradient', fontproperties='serif')

    fig.set_size_inches((6,8))
    #plt.show()
    plt.savefig('initial_gradient.png')
    plt.savefig('initial_gradient.pdf', pad_inches=0.05, bbox_inches='tight')


    # ----- Nice versions of Alpha and beta schedules for paper -----
    print "Plotting full alpha and beta schedules curves..."
    fig = plt.figure(0)
    fig.clf()
    ax = fig.add_subplot(411)
    for cur_results, name in zip(results['log_alphas'][-1].T, parser.names):
        if name[0] == 'weights':
            ax.plot(np.exp(cur_results), 'o-', label=nice_layer_name(name))
    low, high = ax.get_ylim()
    ax.set_ylim([0, high])
    ax.set_ylabel('Step size', fontproperties='serif')
    ax.set_xticklabels([])
    ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5),
              prop={'family':'serif', 'size':'12'})

    ax = fig.add_subplot(412)
    for cur_results, name in zip(results['invlogit_betas'][-1].T, parser.names):
        if name[0] == 'weights':
            ax.plot(logit(cur_results), 'o-', label=nice_layer_name(name))
    ax.set_ylim([0, 1])
    ax.set_xlabel('Learning Iteration', fontproperties='serif')
    ax.set_ylabel('Momentum', fontproperties='serif')

    ax = fig.add_subplot(413)
    for cur_results, name in zip(results['log_alphas'][-1].T, parser.names):
        if name[0] == 'biases':
            ax.plot(np.exp(cur_results), 'o-', label=nice_layer_name(name))
    low, high = ax.get_ylim()
    ax.set_ylim([0, high])
    ax.set_ylabel('Step size', fontproperties='serif')
    ax.set_xticklabels([])
    ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5),
              prop={'family':'serif', 'size':'12'})

    ax = fig.add_subplot(414)
    for cur_results, name in zip(results['invlogit_betas'][-1].T, parser.names):
        if name[0] == 'biases':
            ax.plot(logit(cur_results), 'o-', label=nice_layer_name(name))
    ax.set_ylim([0, 1])
    ax.set_xlabel('Learning Iteration', fontproperties='serif')
    ax.set_ylabel('Momentum', fontproperties='serif')
    fig.set_size_inches((6,8))
    #plt.show()
    plt.savefig('alpha_beta_paper.png')
    plt.savefig('alpha_beta_paper.pdf', pad_inches=0.05, bbox_inches='tight')



    print "Plotting learning curves..."
    fig.clf()
    fig.set_size_inches((6,8))
    # ----- Primal learning curves -----
    ax = fig.add_subplot(311)
    ax.set_title('Primal learning curves')
    for i, y in enumerate(results['learning_curves']):
        ax.plot(y['learning_curve'], 'o-', label='Meta iter {0}'.format(i))
    ax.set_xlabel('Epoch number')
    ax.set_ylabel('Negative log prob')
    #ax.legend(loc=1, frameon=False)

    ax = fig.add_subplot(312)
    ax.set_title('Meta learning curves')
    losses = ['train_loss', 'valid_loss', 'tests_loss']
    for loss_type in losses:
        ax.plot(results[loss_type], 'o-', label=loss_type)
    ax.set_xlabel('Meta iter number')
    ax.set_ylabel('Negative log prob')
    ax.legend(loc=1, frameon=False)

    ax = fig.add_subplot(313)
    ax.set_title('Meta-gradient magnitude')
    ax.plot(results['meta_grad_magnitude'], 'o-', label='Meta-gradient magnitude')
    ax.plot(results['meta_grad_angle'], 'o-', label='Meta-gradient angle')
    ax.set_xlabel('Meta iter number')
    ax.set_ylabel('Meta-gradient Magnitude')
    ax.legend(loc=1, frameon=False)

    plt.savefig('learning_curves.png')



    # ----- Nice learning curves for paper -----
    print "Plotting nice learning curves for paper..."
    fig.clf()
    # ----- Primal learning curves -----
    ax = fig.add_subplot(111)
    #ax.set_title('Primal learning curves')
    ax.plot(results['learning_curves'][0]['iteration'],
            results['learning_curves'][0]['learning_curve'],  '-', label='Initial hypers')
    ax.plot(results['learning_curves'][-1]['iteration'],
            results['learning_curves'][-1]['learning_curve'], '-', label='Final hypers')
    ax.set_xlabel('Training iteration')
    ax.set_ylabel('Training loss')
    fig.set_size_inches((2.5,2.5))
    ax.legend(numpoints=1, loc=1, frameon=False, prop={'size':'10'})
    plt.savefig('learning_curves_paper.pdf', pad_inches=0.05, bbox_inches='tight')

    fig.clf()
    ax = fig.add_subplot(111)
    #ax.set_title('Meta learning curves')
    losses = ['train_loss', 'valid_loss', 'tests_loss']
    loss_names = ['Training loss', 'Validation loss', 'Test loss']
    for loss_type, loss_name in zip(losses, loss_names):
        ax.plot(results[loss_type], 'o-', label=loss_name)
    ax.set_xlabel('Meta iteration')
    ax.set_ylabel('Predictive loss')
    ax.legend(loc=1, frameon=False)
    fig.set_size_inches((2.5,2.5))
    ax.legend(numpoints=1, loc=1, frameon=False, prop={'size':'10'})
    plt.savefig('meta_learning_curve_paper.pdf', pad_inches=0.05, bbox_inches='tight')





    # ----- Extra learning curve info -----
    print "Plotting extra learning curves..."
    fig.clf()
    ax = fig.add_subplot(311)
    ax.set_title('Primal learning curves')
    for i, y in enumerate(results['learning_curves']):
        ax.plot(y['grad_norm'], 'o-', label='Meta iter {0}'.format(i))
    ax.set_xlabel('Epoch number')
    #ax.legend(loc=1, frameon=False)
    ax.set_title('Grad norm')

    ax = fig.add_subplot(312)
    for i, y in enumerate(results['learning_curves']):
        ax.plot(y['weight_norm'], 'o-', label='Meta iter {0}'.format(i))
    ax.set_xlabel('Epoch number')
    ax.legend(loc=1, frameon=False)
    ax.set_title('Weight norm')

    ax = fig.add_subplot(313)
    for i, y in enumerate(results['learning_curves']):
        ax.plot(y['velocity_norm'], 'o-', label='Meta iter {0}'.format(i))
    ax.set_xlabel('Epoch number')
    ax.set_title('Velocity norm')
    ax.legend(loc=1, frameon=False)
    plt.savefig('extra_learning_curves.png')





    # ----- Init scale and L2 reg -----
    print "Plotting initialization distributions and regularization..."
    fig.clf()
    ax = fig.add_subplot(111)
    for i, y in enumerate(zip(*results['log_param_scale'])):
        if parser.names[i][0] == 'weights':
            ax.plot(np.exp(y), 'o-', label='')#layer_name(parser.names[i]))
    ax.set_xlabel('Meta iteration')
    #ax.set_ylabel('Initial scale')
    # Show lines for theoretical optimum.
    y1 = 1.0/np.sqrt(layer_sizes[0])
    y2 = 1.0/np.sqrt(layer_sizes[1])
    ax.plot(ax.get_xlim(), (y2, y2), 'k--', label=r'$1/\sqrt{50}$')
    ax.plot(ax.get_xlim(), (y1, y1), 'b--', label=r'$1/\sqrt{784}$')
    ax.set_yticks([0.00, 1.0/np.sqrt(784), 0.10, 1.0/np.sqrt(50), 0.20, 0.25])
    ax.set_yticklabels(['0.00', r"$1 / \sqrt{784}$", "0.10",
                        r"$1 / \sqrt{50}$", "0.20", "0.25"])
    fig.set_size_inches((2.5,2.5))
    #ax.legend(numpoints=1, loc=1, frameon=False, prop={'size':'12'})
    plt.savefig('init_weight_learning_curve.pdf', pad_inches=0.05, bbox_inches='tight')

    fig.clf()
    ax = fig.add_subplot(111)
    for i, y in enumerate(zip(*results['log_param_scale'])):
        if parser.names[i][0] == 'biases':
            ax.plot(np.exp(y), 'o-', label=layer_name(parser.names[i]))
    ax.set_xlabel('Meta iteration')
    ax.set_ylabel('Initial scale')
    fig.set_size_inches((2.5,2.5))
    ax.legend(numpoints=1, loc=0, frameon=False, prop={'size':'10'})
    plt.savefig('init_bias_learning_curve.pdf', pad_inches=0.05, bbox_inches='tight')
示例#17
0
def test_logit():
    assert np.allclose(logit(0), 0.5, rtol=1e-3, atol=1e-4)
    assert np.allclose(logit(-100), 0, rtol=1e-3, atol=1e-4)
    assert np.allclose(logit( 100), 1, rtol=1e-3, atol=1e-4)
示例#18
0
def plot():

    import matplotlib.pyplot as plt
    with open('results.pkl') as f:
        results, parser, parsed_init_hypergrad = pickle.load(f)

    # ----- Alpha and beta initial hypergradients -----
    fig = plt.figure(0)
    fig.clf()
    ax = fig.add_subplot(411)
    for cur_results, name in zip(parsed_init_hypergrad['log_alphas'].T, parser.names):
        if name[0] == 'weights':
            ax.plot(cur_results, 'o-', label=name)
    ax.set_ylabel('Step size Gradient', fontproperties='serif')
    ax.set_xticklabels([])
    ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5),
              prop={'family':'serif', 'size':'12'})

    ax = fig.add_subplot(412)
    for cur_results, name in zip(parsed_init_hypergrad['invlogit_betas'].T, parser.names):
        if name[0] == 'weights':
            ax.plot(cur_results, 'o-', label=name)
    ax.set_xlabel('Learning Iteration', fontproperties='serif')
    ax.set_ylabel('Momentum Gradient', fontproperties='serif')

    ax = fig.add_subplot(413)
    for cur_results, name in zip(parsed_init_hypergrad['log_alphas'].T, parser.names):
        if name[0] == 'biases':
            ax.plot(cur_results, 'o-', label=name)
    ax.set_ylabel('Step size Gradient', fontproperties='serif')
    ax.set_xticklabels([])
    ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5),
              prop={'family':'serif', 'size':'12'})

    ax = fig.add_subplot(414)
    for cur_results, name in zip(parsed_init_hypergrad['invlogit_betas'].T, parser.names):
        if name[0] == 'biases':
            ax.plot(cur_results, 'o-', label=name)
    ax.set_xlabel('Learning Iteration', fontproperties='serif')
    ax.set_ylabel('Momentum Gradient', fontproperties='serif')

    fig.set_size_inches((6,8))
    #plt.show()
    plt.savefig('initial_gradient.png')
    plt.savefig('initial_gradient.pdf', pad_inches=0.05, bbox_inches='tight')


    # ----- Nice versions of Alpha and beta schedules for paper -----
    fig = plt.figure(0)
    fig.clf()
    ax = fig.add_subplot(411)
    #ax.set_title('Alpha learning curves')
    for cur_results, name in zip(results['log_alphas'][-1].T, parser.names):
        if name[0] == 'weights':
            ax.plot(np.exp(cur_results), 'o-', label=name)
    #ax.set_xlabel('Learning Iteration', fontproperties='serif')
    low, high = ax.get_ylim()
    ax.set_ylim([0, high])
    ax.set_ylabel('Step size', fontproperties='serif')
    ax.set_xticklabels([])
    ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5),
              prop={'family':'serif', 'size':'12'})

    ax = fig.add_subplot(412)
    #ax.set_title('Alpha learning curves')
    for cur_results, name in zip(results['invlogit_betas'][-1].T, parser.names):
        if name[0] == 'weights':
            ax.plot(logit(cur_results), 'o-', label=name)
    low, high = ax.get_ylim()
    ax.set_ylim([0, 1])
    ax.set_xlabel('Learning Iteration', fontproperties='serif')
    ax.set_ylabel('Momentum', fontproperties='serif')

    ax = fig.add_subplot(413)
    #ax.set_title('Alpha learning curves')
    for cur_results, name in zip(results['log_alphas'][-1].T, parser.names):
        if name[0] == 'biases':
            ax.plot(np.exp(cur_results), 'o-', label=name)
    #ax.set_xlabel('Learning Iteration', fontproperties='serif')
    low, high = ax.get_ylim()
    ax.set_ylim([0, high])
    ax.set_ylabel('Step size', fontproperties='serif')
    ax.set_xticklabels([])
    ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5),
              prop={'family':'serif', 'size':'12'})

    ax = fig.add_subplot(414)
    #ax.set_title('Alpha learning curves')
    for cur_results, name in zip(results['invlogit_betas'][-1].T, parser.names):
        if name[0] == 'biases':
            ax.plot(logit(cur_results), 'o-', label=name)
    low, high = ax.get_ylim()
    ax.set_ylim([0, 1])
    ax.set_xlabel('Learning Iteration', fontproperties='serif')
    ax.set_ylabel('Momentum', fontproperties='serif')


    fig.set_size_inches((6,8))
    #plt.show()
    plt.savefig('alpha_beta_paper.png')
    plt.savefig('alpha_beta_paper.pdf', pad_inches=0.05, bbox_inches='tight')

    fig.clf()
    fig.set_size_inches((6,8))
    # ----- Primal learning curves -----
    ax = fig.add_subplot(311)
    ax.set_title('Primal learning curves')
    for i, y in enumerate(results['learning_curves']):
        ax.plot(y['learning_curve'], 'o-', label='Meta iter {0}'.format(i))
    ax.set_xlabel('Epoch number')
    ax.set_ylabel('Negative log prob')
    #ax.legend(loc=1, frameon=False)

    ax = fig.add_subplot(312)
    ax.set_title('Meta learning curves')
    losses = ['train_loss', 'valid_loss', 'tests_loss']
    for loss_type in losses:
        ax.plot(results[loss_type], 'o-', label=loss_type)
    ax.set_xlabel('Meta iter number')
    ax.set_ylabel('Negative log prob')
    ax.legend(loc=1, frameon=False)

    ax = fig.add_subplot(313)
    ax.set_title('Meta-gradient magnitude')
    ax.plot(results['meta_grad_magnitude'], 'o-', label='Meta-gradient magnitude')
    ax.plot(results['meta_grad_angle'], 'o-', label='Meta-gradient angle')
    ax.set_xlabel('Meta iter number')
    ax.set_ylabel('Meta-gradient Magnitude')
    ax.legend(loc=1, frameon=False)

    plt.savefig('learning_curves.png')

    # ----- Learning curve info -----
    fig.clf()
    ax = fig.add_subplot(311)
    ax.set_title('Primal learning curves')
    for i, y in enumerate(results['learning_curves']):
        ax.plot(y['grad_norm'], 'o-', label='Meta iter {0}'.format(i))
    ax.set_xlabel('Epoch number')
    #ax.legend(loc=1, frameon=False)
    ax.set_title('Grad norm')

    ax = fig.add_subplot(312)
    for i, y in enumerate(results['learning_curves']):
        ax.plot(y['weight_norm'], 'o-', label='Meta iter {0}'.format(i))
    ax.set_xlabel('Epoch number')
    ax.legend(loc=1, frameon=False)
    ax.set_title('Weight norm')

    ax = fig.add_subplot(313)
    for i, y in enumerate(results['learning_curves']):
        ax.plot(y['velocity_norm'], 'o-', label='Meta iter {0}'.format(i))
    ax.set_xlabel('Epoch number')
    ax.set_title('Velocity norm')
    ax.legend(loc=1, frameon=False)
    plt.savefig('extra_learning_curves.png')

    # ----- Alpha and beta schedules -----
    fig.clf()
    ax = fig.add_subplot(211)
    ax.set_title('Alpha learning curves')
    for i, y in enumerate(results['log_alphas']):
        ax.plot(y, 'o-', label="Meta iter {0}".format(i))
    ax.set_xlabel('Primal iter number')
    #ax.set_ylabel('Log alpha')
    ax.legend(loc=1, frameon=False)

    ax = fig.add_subplot(212)
    ax.set_title('Beta learning curves')
    for y in results['invlogit_betas']:
        ax.plot(y, 'o-')
    ax.set_xlabel('Primal iter number')
    ax.set_ylabel('Inv logit beta')
    plt.savefig('alpha_beta_curves.png')

    # ----- Init scale and L2 reg -----
    fig.clf()
    ax = fig.add_subplot(211)
    ax.set_title('Init scale learning curves')
    for i, y in enumerate(zip(*results['log_param_scale'])):
        if parser.names[i][0] == 'weights':
            ax.plot(y, 'o-', label=parser.names[i])
    ax.set_xlabel('Meta iter number')
    ax.set_ylabel('Log param scale')
    ax.legend(loc=1, frameon=False)

    ax = fig.add_subplot(212)
    ax.set_title('Init scale learning curves')
    for i, y in enumerate(zip(*results['log_param_scale'])):
        if parser.names[i][0] == 'biases':
            ax.plot(y, 'o-', label=parser.names[i])
    ax.set_xlabel('Meta iter number')
    ax.set_ylabel('Log param scale')
    ax.legend(loc=1, frameon=False)

    plt.savefig('scale_and_reg.png')
示例#19
0
 def loss(W_vect, X=0.0, T=0.0, L2_reg=0.0):
     return 800 * logit(rosenbrock(W_vect) / 500)
示例#20
0
def d_logit(x):
    return logit(x) * (1 - logit(x))
示例#21
0
 def loss(W_vect, X=0.0, T=0.0, L2_reg=0.0):
     return 800 * logit(rosenbrock(W_vect) / 500)
示例#22
0
def plot():

    import matplotlib.pyplot as plt
    with open('results.pkl') as f:
        results, parser = pickle.load(f)

    # Fake data
    fig = plt.figure(0)
    fig.clf()
    ax = fig.add_subplot(1, 1, 1)
    ax.set_title("Fake Data")
    images = results['fake_data'][-1]
    plot_mnist_images(images, ax, ims_per_row=10)
    fig.set_size_inches((8, 12))
    plt.savefig('fake_data.pdf', pad_inches=0.05, bbox_inches='tight')

    # Show first layer filters from the last meta-iteration.
    fig = plt.figure(0)
    fig.clf()
    ax = fig.add_subplot(1, 1, 1)
    ax.set_title("Weights")
    weights = results['example_weights']
    parser.vect = weights
    weight_images = parser[('weights', 0)].T
    plot_mnist_images(weight_images, ax, ims_per_row=10)
    fig.set_size_inches((8, 12))
    plt.savefig('first_layer_weights.pdf',
                pad_inches=0.05,
                bbox_inches='tight')

    # ----- Nice versions of Alpha and beta schedules for paper -----
    fig = plt.figure(0)
    fig.clf()
    ax = fig.add_subplot(411)
    #ax.set_title('Alpha learning curves')
    for cur_results, name in zip(results['log_alphas'][-1].T, parser.names):
        if name[0] == 'weights':
            ax.plot(np.exp(cur_results), 'o-', label=name)
    #ax.set_xlabel('Learning Iteration', fontproperties='serif')
    low, high = ax.get_ylim()
    ax.set_ylim([0, high])
    ax.set_ylabel('Step size', fontproperties='serif')
    ax.set_xticklabels([])
    ax.legend(numpoints=1,
              loc=1,
              frameon=False,
              bbox_to_anchor=(1.0, 0.5),
              prop={
                  'family': 'serif',
                  'size': '12'
              })

    ax = fig.add_subplot(412)
    #ax.set_title('Alpha learning curves')
    for cur_results, name in zip(results['invlogit_betas'][-1].T,
                                 parser.names):
        if name[0] == 'weights':
            ax.plot(logit(cur_results), 'o-', label=name)
    low, high = ax.get_ylim()
    ax.set_ylim([0, 1])
    ax.set_xlabel('Learning Iteration', fontproperties='serif')
    ax.set_ylabel('Momentum', fontproperties='serif')

    ax = fig.add_subplot(413)
    #ax.set_title('Alpha learning curves')
    for cur_results, name in zip(results['log_alphas'][-1].T, parser.names):
        if name[0] == 'biases':
            ax.plot(np.exp(cur_results), 'o-', label=name)
    #ax.set_xlabel('Learning Iteration', fontproperties='serif')
    low, high = ax.get_ylim()
    ax.set_ylim([0, high])
    ax.set_ylabel('Step size', fontproperties='serif')
    ax.set_xticklabels([])
    ax.legend(numpoints=1,
              loc=1,
              frameon=False,
              bbox_to_anchor=(1.0, 0.5),
              prop={
                  'family': 'serif',
                  'size': '12'
              })

    ax = fig.add_subplot(414)
    #ax.set_title('Alpha learning curves')
    for cur_results, name in zip(results['invlogit_betas'][-1].T,
                                 parser.names):
        if name[0] == 'biases':
            ax.plot(logit(cur_results), 'o-', label=name)
    low, high = ax.get_ylim()
    ax.set_ylim([0, 1])
    ax.set_xlabel('Learning Iteration', fontproperties='serif')
    ax.set_ylabel('Momentum', fontproperties='serif')

    fig.set_size_inches((6, 8))
    #plt.show()
    plt.savefig('alpha_beta_paper.png')
    plt.savefig('alpha_beta_paper.pdf', pad_inches=0.05, bbox_inches='tight')

    fig.clf()
    fig.set_size_inches((6, 8))
    # ----- Primal learning curves -----
    ax = fig.add_subplot(311)
    #ax.set_title('Primal learning curves')
    #for i, y in enumerate(results['learning_curves']):
    #    ax.plot(y['learning_curve'], 'o-', label='Meta iter {0}'.format(i))
    #ax.set_xlabel('Epoch number')
    #ax.set_ylabel('Negative log prob')
    #ax.legend(loc=1, frameon=False)

    ax = fig.add_subplot(312)
    ax.set_title('Meta learning curves')
    losses = ['train_loss', 'valid_loss', 'tests_loss']
    for loss_type in losses:
        ax.plot(results[loss_type], 'o-', label=loss_type)
    ax.set_xlabel('Meta iter number')
    ax.set_ylabel('Negative log prob')
    ax.legend(loc=1, frameon=False)

    ax = fig.add_subplot(313)
    ax.set_title('Meta-gradient magnitude')
    ax.plot(results['meta_grad_magnitude'],
            'o-',
            label='Meta-gradient magnitude')
    ax.plot(results['meta_grad_angle'], 'o-', label='Meta-gradient angle')
    ax.set_xlabel('Meta iter number')
    ax.set_ylabel('Meta-gradient Magnitude')
    ax.legend(loc=1, frameon=False)

    plt.savefig('learning_curves.png')

    # ----- Learning curve info -----
    fig.clf()
    ax = fig.add_subplot(311)
    ax.set_title('Primal learning curves')
    for i, y in enumerate(results['learning_curves']):
        ax.plot(y['grad_norm'], 'o-', label='Meta iter {0}'.format(i))
    ax.set_xlabel('Epoch number')
    #ax.legend(loc=1, frameon=False)
    ax.set_title('Grad norm')

    ax = fig.add_subplot(312)
    for i, y in enumerate(results['learning_curves']):
        ax.plot(y['weight_norm'], 'o-', label='Meta iter {0}'.format(i))
    ax.set_xlabel('Epoch number')
    ax.legend(loc=1, frameon=False)
    ax.set_title('Weight norm')

    ax = fig.add_subplot(313)
    for i, y in enumerate(results['learning_curves']):
        ax.plot(y['velocity_norm'], 'o-', label='Meta iter {0}'.format(i))
    ax.set_xlabel('Epoch number')
    ax.set_title('Velocity norm')
    ax.legend(loc=1, frameon=False)
    plt.savefig('extra_learning_curves.png')

    # ----- Alpha and beta schedules -----
    fig.clf()
    ax = fig.add_subplot(211)
    ax.set_title('Alpha learning curves')
    for i, y in enumerate(results['log_alphas']):
        ax.plot(y, 'o-', label="Meta iter {0}".format(i))
    ax.set_xlabel('Primal iter number')
    #ax.set_ylabel('Log alpha')
    ax.legend(loc=1, frameon=False)

    ax = fig.add_subplot(212)
    ax.set_title('Beta learning curves')
    for y in results['invlogit_betas']:
        ax.plot(y, 'o-')
    ax.set_xlabel('Primal iter number')
    ax.set_ylabel('Inv logit beta')
    plt.savefig('alpha_beta_curves.png')

    # ----- Init scale and L2 reg -----
    fig.clf()
    ax = fig.add_subplot(211)
    ax.set_title('Init scale learning curves')
    for i, y in enumerate(zip(*results['log_param_scale'])):
        if parser.names[i][0] == 'weights':
            ax.plot(y, 'o-', label=parser.names[i])
    ax.set_xlabel('Meta iter number')
    ax.set_ylabel('Log param scale')
    ax.legend(loc=1, frameon=False)

    ax = fig.add_subplot(212)
    ax.set_title('Init scale learning curves')
    for i, y in enumerate(zip(*results['log_param_scale'])):
        if parser.names[i][0] == 'biases':
            ax.plot(y, 'o-', label=parser.names[i])
    ax.set_xlabel('Meta iter number')
    ax.set_ylabel('Log param scale')
    ax.legend(loc=1, frameon=False)

    plt.savefig('scale_and_reg.png')