示例#1
0
def main():
    np.random.seed(1)
    xtrain, ytrain, params_true = make_data_linreg_1d()
    N = xtrain.shape[0]
    Xtrain = np.c_[np.ones(N), xtrain] # add column of 1s
    w_ols, loss_ols = LinregModel.ols_fit(Xtrain, ytrain)
    
    expt_configs = make_expt_config(N)
    nexpts = len(expt_configs)
    print nexpts
    
    nrows, ncols = nsubplots(nexpts)
    #nrows, ncols = 4, 2
    loss_trace_fig = plt.figure("loss trace fig")
    param_trace_fig = plt.figure("param trace fig")
    folder = 'figures'
    
    for expt_num, config in enumerate(expt_configs):
        logger = sgd.SGDLogger(print_freq=10)
        np.random.seed(1)
        batchifier = sgd.MiniBatcher(Xtrain, ytrain, config['batch_size'])
        initial_params = np.zeros(2) 
        lr_fun = lambda(iter): sgd.get_learning_rate_exp_decay(iter,
                    config['init_lr'], config['lr_decay']) 
       
        ttl = config_to_str(config)
        print '\nstarting experiment {}'.format(ttl)
        print config

        obj_fun = LinregModel.objective
        #grad_fun = LinregModel.gradient
        grad_fun = autograd.grad(obj_fun)
        result = sgd.sgd_minimize(initial_params, obj_fun, grad_fun,
            batchifier, config['n_steps'], lr_fun, config['momentum'],
            callback=logger.update)
        print result
                
        plotnum = expt_num + 1
        ax = loss_trace_fig.add_subplot(nrows, ncols, plotnum)
        plot_loss_trace(logger.obj_trace, loss_ols, ax)
        ax.set_title(ttl)
        
        ax = param_trace_fig.add_subplot(nrows, ncols, plotnum)
        loss_fun = lambda w0, w1: LinregModel.objective([w0, w1], xtrain, ytrain)
        plot_error_surface(loss_fun, params_true, ax)
        plot_param_trace(logger.param_trace, ax) 
        ax.set_title(ttl)
                
    plt.figure("loss trace fig")         
    fname = os.path.join(folder, 'linreg_1d_sgd_loss_trace.png')
    plt.savefig(fname)
    
    plt.figure("param trace fig")         
    fname = os.path.join(folder, 'linreg_1d_sgd_param_trace.png')
    plt.savefig(fname)
    
    plt.show()
示例#2
0
def run_expt(config, loss_opt=0):
    ttl = config_to_str(config)
    print '\nstarting experiment {}'.format(ttl)
    print config
    
    Xtrain, Ytrain, params_true, true_fun, fun_name = \
      demo.make_data_linreg_1d(config['N'], config['fun_type'])
    data_dim = Xtrain.shape[1]
    N = Xtrain.shape[0]
    Xtrain, Ytrain = opt.shuffle_data(Xtrain, Ytrain)
        
    model_type = config['model_type']
    if model_type == 'linear':
        model = LinregModel(data_dim, add_ones=True)
        params, loss = model.ols_fit(Xtrain, Ytrain)
    elif model_type[0:3] == 'mlp':
        _, layer_sizes = model_type.split(':')
        layer_sizes = [int(n) for n in layer_sizes.split('-')]
        model = MLP(layer_sizes, 'regression', L2_reg=0.001) 
    else:
        raise ValueError('unknown model type {}'.format(model_type))
            
    initial_params = model.init_params() 
    param_dim = len(initial_params)

    plot_data = (data_dim == 1)
    plot_params = (param_dim == 2)
    nplots = 1
    if plot_data: 
        nplots += 1
    if plot_params:
        nplots += 1
    plot_rows, plot_cols = util.nsubplots(nplots)



    if config['optimizer'] == 'BFGS':
        obj_fun = lambda params: model.PNLL(params, Xtrain, Ytrain)
        grad_fun = autograd.grad(obj_fun)
        logger = opt.OptimLogger(lambda params: obj_fun(params), eval_freq=1, 
                    store_freq=1, print_freq=1)   
        params, obj = opt.bfgs(obj_fun, grad_fun, initial_params, config['num_epochs'], 
                          logger.callback)
                         
    if config['optimizer'] == 'SGD':
        B = config['batch_size']
        M = N / B # num_minibatches_per_epoch (num iter per epoch)
        max_iters = config['num_epochs'] * M
        
        grad_fun_with_iter = opt.build_batched_grad(model.gradient, config['batch_size'], Xtrain, Ytrain)
        #obj_fun = opt.build_batched_grad(model.PNLL, config['batch_size'], Xtrain, Ytrain)
        obj_fun = lambda params: model.PNLL(params, Xtrain, Ytrain)
        sf = config.get('store_freq', M)
        logger = opt.OptimLogger(obj_fun, eval_freq=sf, store_freq=sf, print_freq=0)         
        sgd_fun = config['sgd_fun']
 
        if config['lr_tune']==True:
            eval_fun = lambda params: model.PNLL(params, Xtrain, Ytrain)
            lr, lrs, scores = opt.lr_tuner(eval_fun, 'grid', sgd_fun, grad_fun_with_iter,
                            initial_params, int(np.ceil(max_iters*0.1)))
            print 'lr tuner chose lr {:0.3f}'.format(lr)
            print lrs
            print scores
            config['lr_init'] = lr
            
        lr_fun = lambda iter: opt.lr_exp_decay(iter, config['lr_init'],
                                    config['lr_decay'], config['lr_step']) 
        params, obj = sgd_fun(obj_fun, grad_fun_with_iter, initial_params,
                            max_iters, logger.callback, lr_fun)
    
    training_loss = model.PNLL(params, Xtrain, Ytrain)
    print 'finished fitting, training loss {:0.3g}, {} obj calls, {} grad calls'.\
        format(training_loss, model.num_obj_fun_calls, model.num_grad_fun_calls)
    
    fig = plt.figure()
    ax = fig.add_subplot(plot_rows, plot_cols, 1)
    opt.plot_loss_trace(logger.eval_trace, loss_opt, ax)
    ax.set_title('final objective {:0.3g}'.format(training_loss))
    ax.set_xlabel('epochs')
    
    if plot_data:
        ax = fig.add_subplot(plot_rows, plot_cols, 2)
        predict_fun = lambda X: model.predictions(params, X)
        demo.plot_data_and_predictions_1d(Xtrain, Ytrain, true_fun, predict_fun, ax)
    
    if plot_params:
        ax = fig.add_subplot(plot_rows, plot_cols, 3)
        loss_fun = lambda w0, w1: model.PNLL(np.array([w0, w1]), Xtrain, Ytrain)
        demo.plot_error_surface_2d(loss_fun, params, params_true, config['fun_type'], ax)
        demo.plot_param_trace_2d(logger.param_trace, ax)        
        
    ttl = config_to_str(config) # recompute in case lr has been estimated
    fig.suptitle(ttl)
    folder = 'figures/linreg-sgd'        
    fname = os.path.join(folder, 'linreg_1d_sgd_{}.png'.format(ttl))
    plt.savefig(fname)
    return training_loss
示例#3
0
def main():
    np.random.seed(1)
    folder = 'figures/linreg-sgd'

    N = 50
    num_epochs = 100

    #fun_type = 'linear'
    fun_type = 'sine'
    #fun_type = 'quad'

    #model_type = 'linear'
    model_type = 'mlp:1-10-1'

    configs = []
    # BFGS has to be the first config, in order to compute loss_opt
    configs.append({
        'fun_type': fun_type,
        'N': N,
        'model': model_type,
        'optimizer': 'BFGS'
    })

    configs.append({
        'fun_type': fun_type,
        'N': N,
        'model': model_type,
        'optimizer': 'SGD',
        'batch_size': 10,
        'num_epochs': num_epochs,
        'method': 'Rprop',
        'improved': True
    })

    configs.append({
        'fun_type': fun_type,
        'N': N,
        'model': model_type,
        'optimizer': 'SGD',
        'batch_size': 10,
        'num_epochs': num_epochs,
        'lr_fun': 'exp',
        'init_lr': 0.05,
        'lr_decay': 0.9,
        'method': 'momentum',
        'mass': 0.9
    })

    configs.append({
        'fun_type': fun_type,
        'N': N,
        'model': model_type,
        'optimizer': 'SGD',
        'batch_size': 10,
        'num_epochs': num_epochs,
        'lr_fun': 'exp',
        'init_lr': 0.05,
        'lr_decay': 0.9,
        'method': 'RMSprop',
        'grad_sq_decay': 0.9
    })

    configs.append({
        'fun_type': fun_type,
        'N': N,
        'model': model_type,
        'optimizer': 'SGD',
        'batch_size': 10,
        'num_epochs': num_epochs,
        'lr_fun': 'exp',
        'init_lr': 0.05,
        'lr_decay': 0.9,
        'method': 'ADAM',
        'grad_decay': 0.9,
        'grad_sq_decay': 0.999
    })
    configs.append({
        'fun_type': fun_type,
        'N': N,
        'model': model_type,
        'optimizer': 'SGD',
        'batch_size': 10,
        'num_epochs': num_epochs,
        'lr_fun': 'const',
        'init_lr': 0.05,
        'method': 'ADAM',
        'grad_decay': 0.9,
        'grad_sq_decay': 0.999
    })
    configs.append({
        'fun_type': fun_type,
        'N': N,
        'model': model_type,
        'optimizer': 'SGD',
        'batch_size': 10,
        'num_epochs': num_epochs,
        'lr_fun': 'const',
        'init_lr': 0.001,
        'method': 'ADAM',
        'grad_decay': 0.9,
        'grad_sq_decay': 0.999
    })

    params_opt = None
    loss_opt = None
    for expt_num, config in enumerate(configs):
        np.random.seed(1)
        ttl = config_to_str(config)
        print '\nstarting experiment {}'.format(ttl)
        print config

        Xtrain, Ytrain, params_true, true_fun, fun_name = make_data_linreg_1d(\
            config['N'], config['fun_type'])
        data_dim = Xtrain.shape[1]

        if model_type == 'linear':
            model = LinregModel(data_dim, add_ones=True)
            params_opt, loss_opt = model.ols_fit(Xtrain, Ytrain)
        elif model_type[0:3] == 'mlp':
            _, layer_sizes = model_type.split(':')
            layer_sizes = [int(n) for n in layer_sizes.split('-')]
            model = MLP(layer_sizes, 'regression', L2_reg=0.001)
        else:
            raise ValueError('unknown model type {}'.format(model_type))

        initial_params = model.init_params()
        obj_fun = model.PNLL
        grad_fun = model.gradient

        param_dim = len(initial_params)
        plot_data = (data_dim == 1)
        plot_params = (param_dim == 2)
        nplots = 2
        if plot_data:
            nplots += 1
        if plot_params:
            nplots += 1
        plot_rows, plot_cols = util.nsubplots(nplots)

        if config['optimizer'] == 'BFGS':
            logger = sgd.MinimizeLogger(obj_fun,
                                        grad_fun, (Xtrain, Ytrain),
                                        print_freq=1,
                                        store_params=True)
            params, loss, n_fun_evals = sgd.bfgs_fit(initial_params, obj_fun,
                                                     grad_fun,
                                                     (Xtrain, Ytrain),
                                                     logger.update)
            num_props = n_fun_evals * config['N']
            loss_avg = loss
            if params_opt is None:
                params_opt = params
                loss_opt = loss

        if config['optimizer'] == 'SGD':
            logger = sgd.SGDLogger(print_freq=20, store_params=True)
            if config.has_key('lr_fun'):
                if config['lr_fun'] == 'exp':
                    lr_fun = lambda iter, epoch: sgd.lr_exp_decay(
                        iter, config['init_lr'], config['lr_decay'])
                if config['lr_fun'] == 'const':
                    lr_fun = lambda iter, epoch: config['init_lr']
            else:
                lr_fun = None

            if config['method'] == 'momentum':
                sgd_updater = sgd.SGDMomentum(lr_fun, config['mass'])
            if config['method'] == 'RMSprop':
                sgd_updater = sgd.RMSprop(lr_fun, config['grad_sq_decay'])
            if config['method'] == 'ADAM':
                sgd_updater = sgd.ADAM(lr_fun, config['grad_decay'],
                                       config['grad_sq_decay'])
            if config['method'] == 'Rprop':
                sgd_updater = sgd.Rprop(improved_Rprop=config['improved'])

            params, loss, num_minibatch_updates, params_avg, loss_avg = sgd.sgd_minimize(
                initial_params, obj_fun, grad_fun, Xtrain, Ytrain,
                config['batch_size'], config['num_epochs'], sgd_updater,
                logger.update)
            num_props = num_minibatch_updates * config['batch_size']

        print 'finished fitting, {} obj, {} grad, {} props'.format(
            model.num_obj_fun_calls, model.num_grad_fun_calls, num_props)

        fig = plt.figure()
        ax = fig.add_subplot(plot_rows, plot_cols, 1)
        plot_loss_trace(logger.obj_trace, loss_opt, ax, num_props)
        ax.set_title('final objective {:0.3f}, {:0.3f}'.format(loss, loss_avg))

        ax = fig.add_subplot(plot_rows, plot_cols, 2)
        ax.plot(logger.grad_norm_trace)
        ax.set_title('gradient norm vs num updates')

        if plot_data:
            ax = fig.add_subplot(plot_rows, plot_cols, 3)
            predict_fun = lambda X: model.predictions(params, X)
            plot_data_and_predictions_1d(Xtrain, Ytrain, true_fun, predict_fun,
                                         ax)

        if plot_params:
            ax = fig.add_subplot(plot_rows, plot_cols, 4)
            loss_fun = lambda w0, w1: model.PNLL([w0, w1], Xtrain, Ytrain)
            plot_error_surface_2d(loss_fun, params_opt, params_true, fun_type,
                                  ax)
            plot_param_trace_2d(logger.param_trace, ax)

        fig.suptitle(ttl)
        fname = os.path.join(folder, 'linreg_1d_sgd_{}.png'.format(ttl))
        plt.savefig(fname)

    plt.show()
示例#4
0
def main():
    np.random.seed(1)
    xtrain, ytrain, params_true = make_data_linreg_1d()
    N = xtrain.shape[0]
    Xtrain = np.c_[np.ones(N), xtrain]  # add column of 1s
    w_ols, loss_ols = LinregModel.ols_fit(Xtrain, ytrain)

    expt_configs = make_expt_config(N)
    nexpts = len(expt_configs)
    print nexpts

    nrows, ncols = nsubplots(nexpts)
    #nrows, ncols = 4, 2
    loss_trace_fig = plt.figure("loss trace fig")
    param_trace_fig = plt.figure("param trace fig")
    folder = 'figures'

    for expt_num, config in enumerate(expt_configs):
        logger = sgd.SGDLogger(print_freq=10)
        np.random.seed(1)
        batchifier = sgd.MiniBatcher(Xtrain, ytrain, config['batch_size'])
        initial_params = np.zeros(2)
        lr_fun = lambda (iter): sgd.get_learning_rate_exp_decay(
            iter, config['init_lr'], config['lr_decay'])

        ttl = config_to_str(config)
        print '\nstarting experiment {}'.format(ttl)
        print config

        obj_fun = LinregModel.objective
        #grad_fun = LinregModel.gradient
        grad_fun = autograd.grad(obj_fun)
        result = sgd.sgd_minimize(initial_params,
                                  obj_fun,
                                  grad_fun,
                                  batchifier,
                                  config['n_steps'],
                                  lr_fun,
                                  config['momentum'],
                                  callback=logger.update)
        print result

        plotnum = expt_num + 1
        ax = loss_trace_fig.add_subplot(nrows, ncols, plotnum)
        plot_loss_trace(logger.obj_trace, loss_ols, ax)
        ax.set_title(ttl)

        ax = param_trace_fig.add_subplot(nrows, ncols, plotnum)
        loss_fun = lambda w0, w1: LinregModel.objective([w0, w1], xtrain,
                                                        ytrain)
        plot_error_surface(loss_fun, params_true, ax)
        plot_param_trace(logger.param_trace, ax)
        ax.set_title(ttl)

    plt.figure("loss trace fig")
    fname = os.path.join(folder, 'linreg_1d_sgd_loss_trace.png')
    plt.savefig(fname)

    plt.figure("param trace fig")
    fname = os.path.join(folder, 'linreg_1d_sgd_param_trace.png')
    plt.savefig(fname)

    plt.show()
示例#5
0
def main():
    np.random.seed(1)
    folder = 'figures/linreg-sgd'
    
    N = 50
    num_epochs = 100
   
    #fun_type = 'linear'
    fun_type = 'sine'
    #fun_type = 'quad'
    
    #model_type = 'linear'
    model_type = 'mlp:1-10-1'
                
    configs = []
    # BFGS has to be the first config, in order to compute loss_opt
    configs.append({'fun_type': fun_type, 'N': N, 'model': model_type, 
                    'optimizer': 'BFGS'})
                    
    configs.append({'fun_type': fun_type, 'N': N, 'model': model_type,  
                    'optimizer': 'SGD', 'batch_size': 10,  'num_epochs': num_epochs, 
                    'method': 'Rprop', 'improved': True})             
                                        
    configs.append({'fun_type': fun_type, 'N': N, 'model': model_type,  
                    'optimizer': 'SGD', 'batch_size': 10,  'num_epochs': num_epochs, 
                    'lr_fun': 'exp', 'init_lr': 0.05, 'lr_decay': 0.9,
                    'method': 'momentum', 'mass': 0.9})  
                    
    configs.append({'fun_type': fun_type, 'N': N, 'model': model_type,  
                    'optimizer': 'SGD', 'batch_size': 10,  'num_epochs': num_epochs, 
                    'lr_fun': 'exp', 'init_lr': 0.05, 'lr_decay': 0.9,
                    'method': 'RMSprop', 'grad_sq_decay': 0.9})
                    
    configs.append({'fun_type': fun_type, 'N': N, 'model': model_type,  
                    'optimizer': 'SGD', 'batch_size': 10,  'num_epochs': num_epochs, 
                    'lr_fun': 'exp', 'init_lr': 0.05, 'lr_decay': 0.9, 
                    'method': 'ADAM', 'grad_decay': 0.9, 'grad_sq_decay': 0.999})
    configs.append({'fun_type': fun_type, 'N': N, 'model': model_type,  
                    'optimizer': 'SGD', 'batch_size': 10,  'num_epochs': num_epochs, 
                    'lr_fun': 'const', 'init_lr': 0.05, 
                    'method': 'ADAM', 'grad_decay': 0.9, 'grad_sq_decay': 0.999})
    configs.append({'fun_type': fun_type, 'N': N, 'model': model_type,  
                    'optimizer': 'SGD', 'batch_size': 10,  'num_epochs': num_epochs, 
                    'lr_fun': 'const', 'init_lr': 0.001, 
                    'method': 'ADAM', 'grad_decay': 0.9, 'grad_sq_decay': 0.999})
    
    params_opt = None
    loss_opt = None
    for expt_num, config in enumerate(configs):
        np.random.seed(1)
        ttl = config_to_str(config)
        print '\nstarting experiment {}'.format(ttl)
        print config
        
        Xtrain, Ytrain, params_true, true_fun, fun_name = make_data_linreg_1d(\
            config['N'], config['fun_type'])
        data_dim = Xtrain.shape[1]
        
        if model_type == 'linear':
            model = LinregModel(data_dim, add_ones=True)
            params_opt, loss_opt = model.ols_fit(Xtrain, Ytrain)
        elif model_type[0:3] == 'mlp':
            _, layer_sizes = model_type.split(':')
            layer_sizes = [int(n) for n in layer_sizes.split('-')]
            model = MLP(layer_sizes, 'regression', L2_reg=0.001) 
        else:
             raise ValueError('unknown model type {}'.format(model_type))
                
        initial_params = model.init_params() 
        obj_fun = model.PNLL
        grad_fun = model.gradient
        
        param_dim = len(initial_params)
        plot_data = (data_dim == 1)
        plot_params = (param_dim == 2)
        nplots = 2
        if plot_data: 
            nplots += 1
        if plot_params:
            nplots += 1
        plot_rows, plot_cols = util.nsubplots(nplots)
         
        if config['optimizer'] == 'BFGS':
            logger = sgd.MinimizeLogger(obj_fun, grad_fun, (Xtrain, Ytrain), print_freq=1, store_params=True)
            params, loss, n_fun_evals = sgd.bfgs_fit(initial_params, obj_fun, grad_fun, (Xtrain, Ytrain), logger.update) 
            num_props = n_fun_evals * config['N']
            loss_avg = loss
            if params_opt is None:
                params_opt = params
                loss_opt = loss
                
        if config['optimizer'] == 'SGD':
            logger = sgd.SGDLogger(print_freq=20, store_params=True)
            if config.has_key('lr_fun'):
                if config['lr_fun'] == 'exp':
                    lr_fun = lambda iter, epoch: sgd.lr_exp_decay(iter, config['init_lr'], config['lr_decay']) 
                if config['lr_fun'] == 'const':
                    lr_fun = lambda iter, epoch: config['init_lr']
            else:
                lr_fun = None
                
            if config['method'] == 'momentum':
                sgd_updater = sgd.SGDMomentum(lr_fun, config['mass'])
            if config['method'] == 'RMSprop':
                sgd_updater = sgd.RMSprop(lr_fun, config['grad_sq_decay'])
            if config['method'] == 'ADAM':
                sgd_updater = sgd.ADAM(lr_fun, config['grad_decay'], config['grad_sq_decay'])
            if config['method'] == 'Rprop':
                sgd_updater = sgd.Rprop(improved_Rprop = config['improved'])
               
            params, loss, num_minibatch_updates, params_avg, loss_avg = sgd.sgd_minimize(initial_params, obj_fun, grad_fun,
                Xtrain, Ytrain, config['batch_size'], config['num_epochs'], sgd_updater, logger.update)
            num_props = num_minibatch_updates * config['batch_size']
           
                        
        
        print 'finished fitting, {} obj, {} grad, {} props'.format(model.num_obj_fun_calls, model.num_grad_fun_calls, num_props)

        fig = plt.figure()
        ax = fig.add_subplot(plot_rows, plot_cols, 1)
        plot_loss_trace(logger.obj_trace, loss_opt, ax, num_props)
        ax.set_title('final objective {:0.3f}, {:0.3f}'.format(loss, loss_avg))
        
        ax = fig.add_subplot(plot_rows, plot_cols, 2)
        ax.plot(logger.grad_norm_trace)
        ax.set_title('gradient norm vs num updates')
        
        if plot_data:
            ax = fig.add_subplot(plot_rows, plot_cols, 3)
            predict_fun = lambda X: model.predictions(params, X)
            plot_data_and_predictions_1d(Xtrain, Ytrain, true_fun, predict_fun, ax)
        
        if plot_params:
            ax = fig.add_subplot(plot_rows, plot_cols, 4)
            loss_fun = lambda w0, w1: model.PNLL([w0, w1], Xtrain, Ytrain)
            plot_error_surface_2d(loss_fun, params_opt, params_true, fun_type, ax)
            plot_param_trace_2d(logger.param_trace, ax)        
         
        fig.suptitle(ttl)        
        fname = os.path.join(folder, 'linreg_1d_sgd_{}.png'.format(ttl))
        plt.savefig(fname)
    
    plt.show()
示例#6
0
def run_expt(config, loss_opt=0):
    ttl = config_to_str(config)
    print '\nstarting experiment {}'.format(ttl)
    print config
    
    Xtrain, Ytrain, params_true, true_fun, fun_name = demo.make_data_linreg_1d(config['N'], config['fun_type'])
    data_dim = Xtrain.shape[1]
    N = Xtrain.shape[0]
    Xtrain, Ytrain = opt.shuffle_data(Xtrain, Ytrain)
        
    model_type = config['model_type']
    if model_type == 'linear':
        model = LinregModel(data_dim, add_ones=True)
        params, loss = model.ols_fit(Xtrain, Ytrain)
    elif model_type[0:3] == 'mlp':
        _, layer_sizes = model_type.split(':')
        layer_sizes = [int(n) for n in layer_sizes.split('-')]
        model = MLP(layer_sizes, 'regression', L2_reg=0.001, Ntrain=N) 
    else:
        raise ValueError('unknown model type {}'.format(model_type))
            
    initial_params = model.init_params() 
    param_dim = len(initial_params)

    plot_data = (data_dim == 1)
    plot_params = (param_dim == 2)
    nplots = 2
    if plot_data: 
        nplots += 1
    if plot_params:
        nplots += 1
    plot_rows, plot_cols = util.nsubplots(nplots)
    
    if config['optimizer'] == 'BFGS':
        obj_fun = lambda params: model.PNLL(params, Xtrain, Ytrain)
        logger = opt.OptimLogger(lambda params, iter: obj_fun(params), store_freq=1, print_freq=10)   
        params = opt.bfgs(autograd.value_and_grad(obj_fun), initial_params, logger.callback, config['num_epochs'])
                     
    if config['optimizer'] == 'SGD':
        B = config['batch_size']
        M = N / B # num_minibatches_per_epoch (num iter per epoch)
        max_iters = config['num_epochs'] * M
        
        grad_fun = opt.build_batched_grad(model.gradient, config['batch_size'], Xtrain, Ytrain)
        #obj_fun = opt.build_batched_grad(model.PNLL, config['batch_size'], Xtrain, Ytrain)
        obj_fun = lambda params, iter: model.PNLL(params, Xtrain, Ytrain)
        logger = opt.OptimLogger(obj_fun, store_freq=M, print_freq=M*10, store_params=plot_params)         
            
        if config.has_key('lr_fun'):
            if config['lr_fun'] == 'exp':
                lr_fun = lambda iter: opt.lr_exp_decay(iter, config['init_lr'], config['lr_decay']) 
            elif config['lr_fun'] == 'const':
                lr_fun = opt.const_lr(config['init_lr']) 
            else:
                raise ValueError('Unknown lr-fun {}'.format(lr_fun))

        #sgd_fun = config['sgd-fun']
        #params = sgd_fun(grad_fun, initial_params, logger.callback, \
        #    max_iters, lr_fun, *config['args'])
        
        if config['sgd-method'] == 'momentum':
            params = opt.sgd(grad_fun, initial_params, logger.callback, \
            max_iters, lr_fun, config['mass'])
        elif config['sgd-method'] == 'RMSprop':
            params = opt.rmsprop(grad_fun, initial_params, logger.callback, \
                max_iters, lr_fun, config['grad_sq_decay'])
        elif config['sgd-method'] == 'ADAM':
            params = opt.adam(grad_fun, initial_params, logger.callback, \
                max_iters, lr_fun, config['grad_decay'], config['grad_sq_decay'])
        elif config['sgd-method'] == 'AutoADAM':
            eval_fn = lambda params: model.PNLL(params, Xtrain, Ytrain)
            params, lr, scores = opt.autoadam(grad_fun, initial_params, logger.callback, \
                max_iters, eval_fn, config['auto-method'])
            config['init_lr'] = lr
            config['lr_fun'] = 'const'
            ttl = config_to_str(config)
            print 'autoadam: chose {:0.3f} as lr'.format(lr)
            print scores
        else:
            raise ValueError('Unknown SGD method {}'.format(config['method']))
        
            

    training_loss = model.PNLL(params, Xtrain, Ytrain)
    print 'finished fitting, training loss {:0.3f}, {} obj calls, {} grad calls'.\
        format(training_loss, model.num_obj_fun_calls, model.num_grad_fun_calls)
    
    fig = plt.figure()
    ax = fig.add_subplot(plot_rows, plot_cols, 1)
    opt.plot_loss_trace(logger.obj_trace, loss_opt, ax)
    ax.set_title('final objective {:0.3f}'.format(training_loss))
    ax.set_xlabel('epochs')
    
    ax = fig.add_subplot(plot_rows, plot_cols, 2)
    ax.plot(logger.grad_norm_trace)
    ax.set_title('gradient norm vs num updates')
    
    if plot_data:
        ax = fig.add_subplot(plot_rows, plot_cols, 3)
        predict_fun = lambda X: model.predictions(params, X)
        demo.plot_data_and_predictions_1d(Xtrain, Ytrain, true_fun, predict_fun, ax)
    
    if plot_params:
        ax = fig.add_subplot(plot_rows, plot_cols, 4)
        loss_fun = lambda w0, w1: model.PNLL(np.array([w0, w1]), Xtrain, Ytrain)
        demo.plot_error_surface_2d(loss_fun, params, params_true, config['fun_type'], ax)
        demo.plot_param_trace_2d(logger.param_trace, ax)        
        
    fig.suptitle(ttl)
    folder = 'figures/linreg-sgd'        
    fname = os.path.join(folder, 'linreg_1d_sgd_{}.png'.format(ttl))
    plt.savefig(fname)
    return training_loss