def main(file_path=None, logspace=False, plot_list=None):

    if file_path is None:
        path_list = glob("./sirf/output/csv/*.csv.gz")
        file_path = sorted(path_list)[-1]  # get most recent file

    with util.openz(file_path) as lf:
        data = pandas.read_csv(lf)

    time_stamp = file_path[file_path.rfind("/") + 1 :]
    time_stamp = ".".join(time_stamp.split(".")[0:2])

    gb = data.groupby(["method", "samples"], axis=0)
    d_group_samples = dict(izip(gb.groups.keys(), [len(x) for x in gb.groups.values()]))

    mean_data = gb.mean()
    std_data = gb.std()

    md = mean_data.reset_index()
    sd = std_data.reset_index()
    methods = md.method.unique()
    losses = md.filter(regex="test-\S+|true-\S+|sample-reward").columns.unique()

    keys = ["alpha", "gamma", "k", "lambda", "size", "regularization"]  # min imporovement, beta, patience, maxiter?
    assert set(keys).issubset(set(data.columns))
    vals = [data.xs(0)[x] for x in keys]  # get first row values for keys
    d_params = dict(izip(keys, vals))  # for output strings later

    # print 'columns in data: ', md.columns.unique()

    if plot_list is None:
        plot_list = methods.tolist()

    # remove extra losses
    # print plot_list
    # plot_list.remove('covariance')
    # plot_list.remove('prediction')
    # plot_list.remove('value_prediction')
    # plot_list.remove('prediction-layered')
    # plot_list.remove('value_prediction-layered')
    plot_all_losses(md, sd, losses, methods, d_group_samples, d_params, plot_list, logspace=logspace)

    fig_path = out_string(
        "./sirf/output/plots/",
        "%s.n_samples_all_losses%s" % (time_stamp, "" if not logspace else "_logspace"),
        d_params,
        ".pdf",
    )
    plt.savefig(fig_path)

    plt.clf()
    plot_important_losses(md, sd, losses, methods, d_group_samples, d_params)
    fig_path = out_string("./sirf/output/plots/", "%s.n_samples_performance" % time_stamp, d_params, ".pdf")
    plt.savefig(fig_path)
def main(workers = 0,
         k = 36,
         encoding = 'tile',
         env_size = 9,
         n_runs = 1,
         n_reward_samples = 2500,
         n_reward_runs = 25,
         lam = 0.,
         gam = 0.995,
         beta = 0.995,
         alpha = 1.,
         eta = 0.5,
         eps = 1e-5, 
         patience = 16,
         max_iter = 8, 
         l1theta = None,
         l1code = None,
         l2code = None,
         n_samples = None,
         nonlin = None,
         nonzero = None,
         training_methods = None,
         min_imp = 0.0001,
         min_delta = 1e-6,
         fldir = '/scratch/cluster/ccor/feature-learning/',
         req_rew = False,
         record_runs = False,
         ):

    if n_samples:
        n_samples = map(int, n_samples.split(','))

    beta_ratio = beta/gam 
    # append reward to basis when using perfect info?
    if training_methods is None:
        training_methods = [
            #(['covariance', 'prediction', 'value_prediction', 'bellman'],[['theta-all'],['theta-all'],['theta-all'],['theta-all','w']]),
            #(['prediction', 'value_prediction', 'bellman'],[['theta-all'],['theta-all'],['theta-all','w']]),
            #(['value_prediction'],[['theta-all']]),
            #(['value_prediction', 'bellman'],[['theta-all'],['theta-all','w']]),
            #(['prediction'],[['theta-all']]),
            #(['prediction', 'bellman'], [['theta-all'],['theta-all','w']]),
            #(['covariance'], [['theta-all']]),
            (['prediction', 'bellman'], [['theta-all'],['theta-all','w']]),
            (['bellman', 'prediction', 'bellman'], [['theta-all','w'], ['theta-all'],['theta-all','w']]),
            (['full_covariance', 'bellman'], [['theta-all'],['theta-all','w']]),
            (['covariance', 'bellman'], [['theta-all'],['theta-all','w']]),
            (['full_laplacian'], [['theta-all', 'w']]), 
            (['laplacian'], [['theta-all', 'w']]), 
            (['bellman'], [['theta-all', 'w']]), # baseline
            ]  

    losses = ['sample-reward', 'test-lsbellman', 'test-bellman', 'test-reward',  'test-model', 'test-fullmodel', # test-training
              'true-policy', 'true-policy-uniform', 'true-bellman', 'true-lsbellman', 'true-reward', 'true-model', 'true-fullmodel', 'true-lsq'] \
                if n_samples else \
             ['sample-reward', 'true-policy-uniform', 'true-policy', 'true-bellman',  'true-lsbellman', 'true-reward', 'true-model', 'true-fullmodel', 'true-lsq'] 

    logger.info('building environment of size %i' % env_size)
    mdp = grid_world.MDP(walls_on = True, size = env_size)
    env = mdp.env
    n_states = env_size**2
    m = Model(mdp.R, mdp.P, gam = gam)
    
    # create raw data encoder (constant appended in encoder by default)
    if encoding is 'tabular':
        encoder = TabularFeatures(env_size, append_const = True)
    elif encoding is 'tile':
        encoder = TileFeatures(env_size, append_const = True)
    elif encoding is 'factored':
        raise NotImplementedError

    def sample(n):
        logger.info('sampling from a grid world')
        # currently defaults to on-policy sampling
        
        n_extra = calc_discount_horizon(lam, gam, eps) - 1 # mdp returns n+1 states and n rewards
        kw = dict(n_samples = n + n_extra, encoder = encoder, req_rew = req_rew) 
        R, X, _ = mdp.sample_encoding(**kw)
        
        if req_rew:
            logger.info('reward required')
            assert sum(R.todense()) > 0

        logger.info('reward sum: %.2f' % sum(R.todense()))

        R_val, X_val, _ = mdp.sample_encoding(**kw)
        R_test, X_test, _ = mdp.sample_encoding(**kw)
        #losses = ['test-bellman', 'test-reward',  'test-model', 
                #'true-bellman', 'true-reward', 'true-model', 'true-lsq'] # test-training
        weighting = 'policy'

        return (X, X_val, X_test), (R, R_val, R_test), weighting
       
    def full_info():
        logger.info('using perfect information')
        # gen stacked matrices of I, P, P^2, ...
        R = numpy.array([])
        S = sp.eye(n_states, n_states)
        P = sp.eye(n_states, n_states)
        for i in xrange(calc_discount_horizon(lam, gam, eps)): # decay epsilon 
            R = numpy.append(R, P * m.R)
            P = m.P * P
            S = sp.vstack((S, P))
        
        X = encoder.encode(S)   
        R = sp.csr_matrix(R[:,None])
        X_val = X_test = X
        R_val = R_test = R
        #losses =  ['true-bellman', 'true-reward', 'true-model'] 
        weighting = 'uniform'

        return (X, X_val, X_test), (R, R_val, R_test), weighting

    
    reg = None
    if l1theta is not None:
        reg = ('l1theta', l1theta)
    if l1code is not None:
        reg = ('l1code', l1code)
    if l2code is not None:
        reg = ('l2code', l2code)

    run_param_keys = ['k','method','encoding','samples', 'reward_samples', 'reward_runs', 'size','weighting',
                      'lambda','gamma','alpha', 'eta', 'regularization','nonlinear']
    def yield_jobs(): 
        
        for i,n in enumerate(n_samples or [n_states]):
            
            logger.info('creating job with %i samples/states' % n)
            
            # build bellman operator matrices
            logger.info('making mixing matrices')
            Mphi, Mrew = BellmanBasis.get_mixing_matrices(n, lam, gam, 
                                    sampled = bool(n_samples), eps = eps)
            
            for r in xrange(n_runs):

                n_features = encoder.n_features
                # initialize parameters
                theta_init = numpy.random.standard_normal((n_features, k))
                theta_init /= numpy.sqrt((theta_init * theta_init).sum(axis=0))
                w_init = numpy.random.standard_normal((k+1,1)) 
                w_init = w_init / numpy.linalg.norm(w_init)
                

                # sample or gather full info data
                X_data, R_data, weighting = sample(n) if n_samples else full_info()

                bb_params = [n_features, [k], beta_ratio]
                bb_dict = dict( alpha = alpha, reg_tuple = reg, nonlin = nonlin,
                                nonzero = nonzero, thetas = [theta_init])
        
                for j, tm in enumerate(training_methods):
                    loss_list, wrt_list = tm
                    assert len(loss_list) == len(wrt_list)
                    
                    run_param_values = [k, tm, encoder, n, 
                                n_reward_samples, n_reward_runs, 
                                env_size, weighting, 
                                lam, gam, alpha, eta, 
                                reg[0]+str(reg[1]) if reg else 'None',
                                nonlin if nonlin else 'None']

                    d_run_params = dict(izip(run_param_keys, run_param_values))
                     
                    yield (train_basis,[d_run_params, bb_params, bb_dict,
                                        env, m, losses, # environment, model and loss list
                                        X_data, R_data, Mphi, Mrew, # training data
                                        max_iter, patience, min_imp, min_delta, # optimization params 
                                        fldir, record_runs]) # recording params
    # create output file path
    date_str = time.strftime('%y%m%d.%X').replace(':','')
    out_dir = fldir + 'sirf/output/csv/'
    root =  '%s.%s_results' % (
               date_str, 
               'n_samples' if n_samples else 'full_info')
    
    

    d_experiment_params = dict(izip(['k','encoding','size',
                      'lambda','gamma','alpha','regularization','nl'], 
                      [k, encoder, env_size, lam, gam, alpha, 
                              reg[0]+str(reg[1]) if reg else 'None',
                              nonlin if nonlin else 'None']))
    save_path = out_string(out_dir, root, d_experiment_params, '.csv.gz')
    logger.info('saving results to %s' % save_path)
    
    # get column title list ordered params | losses using dummy dicts
    d_param = dict(izip(run_param_keys, numpy.zeros(len(run_param_keys))))
    d_loss = dict(izip(losses, numpy.zeros(len(run_param_keys))))
    col_keys_array,_ = reorder_columns(d_param, d_loss)

    with openz(save_path, "wb") as out_file:
        writer = csv.writer(out_file)
        writer.writerow(col_keys_array)

        for (_, out) in condor.do(yield_jobs(), workers):
            keys, vals = out
            assert (keys == col_keys_array).all() # todo catch
            writer.writerow(vals)
def experiment(workers = 80, n_runs = 9, k = 16, env_size = 15, gam = 0.998, lam = 0., eps = 1e-5,  
    partition = None, patience = 8, max_iter = 8, weighting = 'uniform', reward_init = False,
    nonlin = 1e-8, n_samples = None, beta_ratio = 1.,
    training_methods = None):
    
    if training_methods is None:
        # note: for each loss string, you need a corresponding wrt list
        if reward_init:
            training_methods = [
            (['prediction'],[['theta-model']]),
            (['prediction', 'layered'], [['theta-model'],['theta-model','w']]),
            (['covariance'],[['theta-model']]), # with reward, without fine-tuning
            (['covariance', 'layered'], [['theta-model'],['theta-model','w']]), # theta-model here for 2nd wrt?
            (['layered'], [['theta-all','w']])] # baseline
        
        else:
            training_methods = [(['prediction'],[['theta-all']]),
            (['prediction', 'layered'], [['theta-all'],['theta-all','w']]),
            (['covariance'],[['theta-all']]), # with reward, without fine-tuning
            (['covariance', 'layered'], [['theta-all'],['theta-all','w']]), # theta-model here for 2nd wrt?
            (['layered'], [['theta-all','w']])] # baseline
    
    theano.gof.compilelock.set_lock_status(False)
    theano.config.on_unused_input = 'ignore'
    theano.config.warn.sum_div_dimshuffle_bug = False

    if n_samples is None:
        #n_samples = [100,500]
        n_samples = numpy.round(numpy.linspace(50,1500,6)).astype(int) 

    if partition is None:
        partition = {'theta-model':k-1, 'theta-reward':1}

    mdp = grid_world.MDP(walls_on = True, size = env_size)    
    m = Model(mdp.env.R, mdp.env.P, gam = gam)
    dim = env_size**2

    # tracked losses

    losses = ['test-bellman', 'test-reward', 'test-model', 'true-bellman', 'true-lsq'] 
    logger.info('losses tracked: '+ str(losses))
    
    #n_extra = bb._calc_n_steps(lam, gam, eps)
    #print 'n extra sampled needed: ', n_extra
    d_loss_data = {}
    for key in losses:
        d_loss_data[key] = numpy.zeros((len(n_samples), n_runs, len(training_methods)))

    def yield_jobs():

        for i,n in enumerate(n_samples):
            
            Mphi, Mrew = BellmanBasis.get_mixing_matrices(n, lam, gam, sampled = True, eps = eps)

            for r in xrange(n_runs):
                
                # initialize features with unit norm
                theta_init = numpy.random.standard_normal((dim+1, k))
                if reward_init:
                    theta_init[:-1,-1] = m.R # XXX set last column to reward
                    theta_init[-1,-1] = 0
                theta_init /= numpy.sqrt((theta_init * theta_init).sum(axis=0))

                w_init = numpy.random.standard_normal((k+1,1)) 
                w_init = w_init / numpy.linalg.norm(w_init)

                # sample data: training, validation, and test sets
                S, Sp, R, _, = mdp.sample_grid_world(n, distribution = weighting); 
                S = numpy.vstack((S, Sp[-1,:]))
                S_val, Sp_val, R_val, _, = mdp.sample_grid_world(n, distribution = weighting)
                S_val = scipy.sparse.vstack((S_val, Sp_val[-1,:]))
                S_test, Sp_test, R_test, _, = mdp.sample_grid_world(n, distribution = weighting)
                S_test = scipy.sparse.vstack((S_test, Sp_test[-1,:]))
                
                bb = BellmanBasis(dim+1, k, beta_ratio, partition = partition, 
                    theta = theta_init, w = w_init, record_loss = losses, nonlin = nonlin)
                
                for j,tm in enumerate(training_methods):
                    
                    yield (condor_job,[(i,r,j), bb, m, tm, 
                            S, R, S_val, R_val, S_test, R_test,
                            Mphi, Mrew, patience, max_iter, weighting])

    # aggregate the condor data
    for (_, result) in condor.do(yield_jobs(), workers):
            d_batch_loss, ind_tuple = result
            for name in d_batch_loss.keys():
                d_loss_data[name][ind_tuple] = d_batch_loss[name]

    # save results! 
    pi_root = 'n_samples_results_rinit' if reward_init else 'n_samples_results'    
    out_path = os.getcwd()+'/sirf/output/pickle/%s.no_r.k=%i.l=%s.g=%s.%s.size=%i.r=%i..pickle.gz' \
                    % (pi_root, k, str(lam), str(gam), weighting, env_size, n_runs)
    logger.info('saving results to %s' % out_path)
    with util.openz(out_path, "wb") as out_file:
        pickle.dump(d_loss_data, out_file, protocol = -1)
    
    x = numpy.array(n_samples, dtype = numpy.float64) #range(len(n_samples))
    f = plt.figure()
    logger.info('plotting')
    plot_styles = ['r-', 'b-', 'g-', 'k-', 'c-', 'm-']
    for i,(key,mat) in enumerate(d_loss_data.items()):

        ax = f.add_subplot(2,3,i+1) # todo generalize for arb length 
        
        for h,tm in enumerate(training_methods):                

            std = numpy.std(mat[:,:,h], axis=1)
            mn = numpy.mean(mat[:,:,h], axis=1)
            if 'test' in key:
                mn = mn/x
                std = std/x
            ax.fill_between(x, mn-std, mn+std, facecolor='yellow', alpha=0.15)
            ax.plot(x, mn, plot_styles[h], label = str(tm[0]))
            plt.title(key)
            #plt.axis('off')
            #plt.legend(loc = 3) # lower left
    
    pl_root = 'n_samples_rinit' if reward_init else 'n_samples'
    plt.savefig(os.getcwd()+'/sirf/output/plots/%s.n=%i-%i.k=%i.l=%s.g=%s.%s.size=%i.r=%i.pdf' 
        % (n_samples[0], n_samples[-1], pl_root, k, 
        str(lam), str(gam), weighting, env_size, n_runs))