def main(file_path=None, logspace=False, plot_list=None): if file_path is None: path_list = glob("./sirf/output/csv/*.csv.gz") file_path = sorted(path_list)[-1] # get most recent file with util.openz(file_path) as lf: data = pandas.read_csv(lf) time_stamp = file_path[file_path.rfind("/") + 1 :] time_stamp = ".".join(time_stamp.split(".")[0:2]) gb = data.groupby(["method", "samples"], axis=0) d_group_samples = dict(izip(gb.groups.keys(), [len(x) for x in gb.groups.values()])) mean_data = gb.mean() std_data = gb.std() md = mean_data.reset_index() sd = std_data.reset_index() methods = md.method.unique() losses = md.filter(regex="test-\S+|true-\S+|sample-reward").columns.unique() keys = ["alpha", "gamma", "k", "lambda", "size", "regularization"] # min imporovement, beta, patience, maxiter? assert set(keys).issubset(set(data.columns)) vals = [data.xs(0)[x] for x in keys] # get first row values for keys d_params = dict(izip(keys, vals)) # for output strings later # print 'columns in data: ', md.columns.unique() if plot_list is None: plot_list = methods.tolist() # remove extra losses # print plot_list # plot_list.remove('covariance') # plot_list.remove('prediction') # plot_list.remove('value_prediction') # plot_list.remove('prediction-layered') # plot_list.remove('value_prediction-layered') plot_all_losses(md, sd, losses, methods, d_group_samples, d_params, plot_list, logspace=logspace) fig_path = out_string( "./sirf/output/plots/", "%s.n_samples_all_losses%s" % (time_stamp, "" if not logspace else "_logspace"), d_params, ".pdf", ) plt.savefig(fig_path) plt.clf() plot_important_losses(md, sd, losses, methods, d_group_samples, d_params) fig_path = out_string("./sirf/output/plots/", "%s.n_samples_performance" % time_stamp, d_params, ".pdf") plt.savefig(fig_path)
def main(workers = 0, k = 36, encoding = 'tile', env_size = 9, n_runs = 1, n_reward_samples = 2500, n_reward_runs = 25, lam = 0., gam = 0.995, beta = 0.995, alpha = 1., eta = 0.5, eps = 1e-5, patience = 16, max_iter = 8, l1theta = None, l1code = None, l2code = None, n_samples = None, nonlin = None, nonzero = None, training_methods = None, min_imp = 0.0001, min_delta = 1e-6, fldir = '/scratch/cluster/ccor/feature-learning/', req_rew = False, record_runs = False, ): if n_samples: n_samples = map(int, n_samples.split(',')) beta_ratio = beta/gam # append reward to basis when using perfect info? if training_methods is None: training_methods = [ #(['covariance', 'prediction', 'value_prediction', 'bellman'],[['theta-all'],['theta-all'],['theta-all'],['theta-all','w']]), #(['prediction', 'value_prediction', 'bellman'],[['theta-all'],['theta-all'],['theta-all','w']]), #(['value_prediction'],[['theta-all']]), #(['value_prediction', 'bellman'],[['theta-all'],['theta-all','w']]), #(['prediction'],[['theta-all']]), #(['prediction', 'bellman'], [['theta-all'],['theta-all','w']]), #(['covariance'], [['theta-all']]), (['prediction', 'bellman'], [['theta-all'],['theta-all','w']]), (['bellman', 'prediction', 'bellman'], [['theta-all','w'], ['theta-all'],['theta-all','w']]), (['full_covariance', 'bellman'], [['theta-all'],['theta-all','w']]), (['covariance', 'bellman'], [['theta-all'],['theta-all','w']]), (['full_laplacian'], [['theta-all', 'w']]), (['laplacian'], [['theta-all', 'w']]), (['bellman'], [['theta-all', 'w']]), # baseline ] losses = ['sample-reward', 'test-lsbellman', 'test-bellman', 'test-reward', 'test-model', 'test-fullmodel', # test-training 'true-policy', 'true-policy-uniform', 'true-bellman', 'true-lsbellman', 'true-reward', 'true-model', 'true-fullmodel', 'true-lsq'] \ if n_samples else \ ['sample-reward', 'true-policy-uniform', 'true-policy', 'true-bellman', 'true-lsbellman', 'true-reward', 'true-model', 'true-fullmodel', 'true-lsq'] logger.info('building environment of size %i' % env_size) mdp = grid_world.MDP(walls_on = True, size = env_size) env = mdp.env n_states = env_size**2 m = Model(mdp.R, mdp.P, gam = gam) # create raw data encoder (constant appended in encoder by default) if encoding is 'tabular': encoder = TabularFeatures(env_size, append_const = True) elif encoding is 'tile': encoder = TileFeatures(env_size, append_const = True) elif encoding is 'factored': raise NotImplementedError def sample(n): logger.info('sampling from a grid world') # currently defaults to on-policy sampling n_extra = calc_discount_horizon(lam, gam, eps) - 1 # mdp returns n+1 states and n rewards kw = dict(n_samples = n + n_extra, encoder = encoder, req_rew = req_rew) R, X, _ = mdp.sample_encoding(**kw) if req_rew: logger.info('reward required') assert sum(R.todense()) > 0 logger.info('reward sum: %.2f' % sum(R.todense())) R_val, X_val, _ = mdp.sample_encoding(**kw) R_test, X_test, _ = mdp.sample_encoding(**kw) #losses = ['test-bellman', 'test-reward', 'test-model', #'true-bellman', 'true-reward', 'true-model', 'true-lsq'] # test-training weighting = 'policy' return (X, X_val, X_test), (R, R_val, R_test), weighting def full_info(): logger.info('using perfect information') # gen stacked matrices of I, P, P^2, ... R = numpy.array([]) S = sp.eye(n_states, n_states) P = sp.eye(n_states, n_states) for i in xrange(calc_discount_horizon(lam, gam, eps)): # decay epsilon R = numpy.append(R, P * m.R) P = m.P * P S = sp.vstack((S, P)) X = encoder.encode(S) R = sp.csr_matrix(R[:,None]) X_val = X_test = X R_val = R_test = R #losses = ['true-bellman', 'true-reward', 'true-model'] weighting = 'uniform' return (X, X_val, X_test), (R, R_val, R_test), weighting reg = None if l1theta is not None: reg = ('l1theta', l1theta) if l1code is not None: reg = ('l1code', l1code) if l2code is not None: reg = ('l2code', l2code) run_param_keys = ['k','method','encoding','samples', 'reward_samples', 'reward_runs', 'size','weighting', 'lambda','gamma','alpha', 'eta', 'regularization','nonlinear'] def yield_jobs(): for i,n in enumerate(n_samples or [n_states]): logger.info('creating job with %i samples/states' % n) # build bellman operator matrices logger.info('making mixing matrices') Mphi, Mrew = BellmanBasis.get_mixing_matrices(n, lam, gam, sampled = bool(n_samples), eps = eps) for r in xrange(n_runs): n_features = encoder.n_features # initialize parameters theta_init = numpy.random.standard_normal((n_features, k)) theta_init /= numpy.sqrt((theta_init * theta_init).sum(axis=0)) w_init = numpy.random.standard_normal((k+1,1)) w_init = w_init / numpy.linalg.norm(w_init) # sample or gather full info data X_data, R_data, weighting = sample(n) if n_samples else full_info() bb_params = [n_features, [k], beta_ratio] bb_dict = dict( alpha = alpha, reg_tuple = reg, nonlin = nonlin, nonzero = nonzero, thetas = [theta_init]) for j, tm in enumerate(training_methods): loss_list, wrt_list = tm assert len(loss_list) == len(wrt_list) run_param_values = [k, tm, encoder, n, n_reward_samples, n_reward_runs, env_size, weighting, lam, gam, alpha, eta, reg[0]+str(reg[1]) if reg else 'None', nonlin if nonlin else 'None'] d_run_params = dict(izip(run_param_keys, run_param_values)) yield (train_basis,[d_run_params, bb_params, bb_dict, env, m, losses, # environment, model and loss list X_data, R_data, Mphi, Mrew, # training data max_iter, patience, min_imp, min_delta, # optimization params fldir, record_runs]) # recording params # create output file path date_str = time.strftime('%y%m%d.%X').replace(':','') out_dir = fldir + 'sirf/output/csv/' root = '%s.%s_results' % ( date_str, 'n_samples' if n_samples else 'full_info') d_experiment_params = dict(izip(['k','encoding','size', 'lambda','gamma','alpha','regularization','nl'], [k, encoder, env_size, lam, gam, alpha, reg[0]+str(reg[1]) if reg else 'None', nonlin if nonlin else 'None'])) save_path = out_string(out_dir, root, d_experiment_params, '.csv.gz') logger.info('saving results to %s' % save_path) # get column title list ordered params | losses using dummy dicts d_param = dict(izip(run_param_keys, numpy.zeros(len(run_param_keys)))) d_loss = dict(izip(losses, numpy.zeros(len(run_param_keys)))) col_keys_array,_ = reorder_columns(d_param, d_loss) with openz(save_path, "wb") as out_file: writer = csv.writer(out_file) writer.writerow(col_keys_array) for (_, out) in condor.do(yield_jobs(), workers): keys, vals = out assert (keys == col_keys_array).all() # todo catch writer.writerow(vals)
def experiment(workers = 80, n_runs = 9, k = 16, env_size = 15, gam = 0.998, lam = 0., eps = 1e-5, partition = None, patience = 8, max_iter = 8, weighting = 'uniform', reward_init = False, nonlin = 1e-8, n_samples = None, beta_ratio = 1., training_methods = None): if training_methods is None: # note: for each loss string, you need a corresponding wrt list if reward_init: training_methods = [ (['prediction'],[['theta-model']]), (['prediction', 'layered'], [['theta-model'],['theta-model','w']]), (['covariance'],[['theta-model']]), # with reward, without fine-tuning (['covariance', 'layered'], [['theta-model'],['theta-model','w']]), # theta-model here for 2nd wrt? (['layered'], [['theta-all','w']])] # baseline else: training_methods = [(['prediction'],[['theta-all']]), (['prediction', 'layered'], [['theta-all'],['theta-all','w']]), (['covariance'],[['theta-all']]), # with reward, without fine-tuning (['covariance', 'layered'], [['theta-all'],['theta-all','w']]), # theta-model here for 2nd wrt? (['layered'], [['theta-all','w']])] # baseline theano.gof.compilelock.set_lock_status(False) theano.config.on_unused_input = 'ignore' theano.config.warn.sum_div_dimshuffle_bug = False if n_samples is None: #n_samples = [100,500] n_samples = numpy.round(numpy.linspace(50,1500,6)).astype(int) if partition is None: partition = {'theta-model':k-1, 'theta-reward':1} mdp = grid_world.MDP(walls_on = True, size = env_size) m = Model(mdp.env.R, mdp.env.P, gam = gam) dim = env_size**2 # tracked losses losses = ['test-bellman', 'test-reward', 'test-model', 'true-bellman', 'true-lsq'] logger.info('losses tracked: '+ str(losses)) #n_extra = bb._calc_n_steps(lam, gam, eps) #print 'n extra sampled needed: ', n_extra d_loss_data = {} for key in losses: d_loss_data[key] = numpy.zeros((len(n_samples), n_runs, len(training_methods))) def yield_jobs(): for i,n in enumerate(n_samples): Mphi, Mrew = BellmanBasis.get_mixing_matrices(n, lam, gam, sampled = True, eps = eps) for r in xrange(n_runs): # initialize features with unit norm theta_init = numpy.random.standard_normal((dim+1, k)) if reward_init: theta_init[:-1,-1] = m.R # XXX set last column to reward theta_init[-1,-1] = 0 theta_init /= numpy.sqrt((theta_init * theta_init).sum(axis=0)) w_init = numpy.random.standard_normal((k+1,1)) w_init = w_init / numpy.linalg.norm(w_init) # sample data: training, validation, and test sets S, Sp, R, _, = mdp.sample_grid_world(n, distribution = weighting); S = numpy.vstack((S, Sp[-1,:])) S_val, Sp_val, R_val, _, = mdp.sample_grid_world(n, distribution = weighting) S_val = scipy.sparse.vstack((S_val, Sp_val[-1,:])) S_test, Sp_test, R_test, _, = mdp.sample_grid_world(n, distribution = weighting) S_test = scipy.sparse.vstack((S_test, Sp_test[-1,:])) bb = BellmanBasis(dim+1, k, beta_ratio, partition = partition, theta = theta_init, w = w_init, record_loss = losses, nonlin = nonlin) for j,tm in enumerate(training_methods): yield (condor_job,[(i,r,j), bb, m, tm, S, R, S_val, R_val, S_test, R_test, Mphi, Mrew, patience, max_iter, weighting]) # aggregate the condor data for (_, result) in condor.do(yield_jobs(), workers): d_batch_loss, ind_tuple = result for name in d_batch_loss.keys(): d_loss_data[name][ind_tuple] = d_batch_loss[name] # save results! pi_root = 'n_samples_results_rinit' if reward_init else 'n_samples_results' out_path = os.getcwd()+'/sirf/output/pickle/%s.no_r.k=%i.l=%s.g=%s.%s.size=%i.r=%i..pickle.gz' \ % (pi_root, k, str(lam), str(gam), weighting, env_size, n_runs) logger.info('saving results to %s' % out_path) with util.openz(out_path, "wb") as out_file: pickle.dump(d_loss_data, out_file, protocol = -1) x = numpy.array(n_samples, dtype = numpy.float64) #range(len(n_samples)) f = plt.figure() logger.info('plotting') plot_styles = ['r-', 'b-', 'g-', 'k-', 'c-', 'm-'] for i,(key,mat) in enumerate(d_loss_data.items()): ax = f.add_subplot(2,3,i+1) # todo generalize for arb length for h,tm in enumerate(training_methods): std = numpy.std(mat[:,:,h], axis=1) mn = numpy.mean(mat[:,:,h], axis=1) if 'test' in key: mn = mn/x std = std/x ax.fill_between(x, mn-std, mn+std, facecolor='yellow', alpha=0.15) ax.plot(x, mn, plot_styles[h], label = str(tm[0])) plt.title(key) #plt.axis('off') #plt.legend(loc = 3) # lower left pl_root = 'n_samples_rinit' if reward_init else 'n_samples' plt.savefig(os.getcwd()+'/sirf/output/plots/%s.n=%i-%i.k=%i.l=%s.g=%s.%s.size=%i.r=%i.pdf' % (n_samples[0], n_samples[-1], pl_root, k, str(lam), str(gam), weighting, env_size, n_runs))