def run_experiment(args): grid_n = int(np.sqrt(args.n)) P = P_matrices.constant_gridworld(grid_n, 0.3, 0.1, 0.1, 0.3, 0.2) R_mat = np.zeros_like(P) R_mat[:, -1] = 1 env = environment.MRP(args.gamma, P, R_mat) # build features Phi = np.stack([np.array(sum([[i]*grid_n for i in range(grid_n)], [])), np.array(list(range(grid_n))*grid_n)], axis = 1) Phi = np.concatenate([Phi, np.ones((args.n,1))], axis = 1) np.random.seed(args.seed) V = mlp.MLP(Phi, [args.width]*args.depth, biases = False, activation='ReLU') if args.online: Vs, thetas, _, _ = online_td0.TD0(V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) else: thetas, Vs = expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) mlp_V_to_V_star = utils.dist_mu(env.mu, env.V_star, jnp.array(Vs)) mlp_V_to_origin = utils.mu_norm(env.mu, jnp.array(Vs)) ts = thetas[args.plot_start::args.plot_step] condition_numbers = [] for i in range(len(ts)): V.theta = ts[i] condition_numbers.append(utils.jac_cond(V.jacobian())) bound = utils.overparam_cond_number_bound(env.P, env.mu, env.gamma, args.k) smoothness = max([abs(env.V_star[i] - env.V_star[i-1]) for i in range(args.n)]) return mlp_V_to_V_star, mlp_V_to_origin, condition_numbers, bound, smoothness
def run_experiment(args): P = P_matrices.build_cyclic_P(args.n, args.delta) R_mat = np.zeros_like(P) R_mat[0, 1] = 1 env = environment.MRP(args.gamma, P, R_mat) # V_star = np.zeros(args.n) # for i in range(0, args.n, 2): # V_star[i] = 1 # env = environment.MRP(args.gamma, P, V_star=V_star) # build features angles = np.linspace(0, 2 * np.pi, args.n, endpoint=False) Phi = np.concatenate([ np.expand_dims(np.sin(angles), 1), np.expand_dims(np.cos(angles), 1), np.ones((args.n, 1)) ], axis=1) np.random.seed(args.seed) V = mlp.MLP(Phi, [args.width] * args.depth, biases=False) Vs, thetas, _, _ = online_td0.TD0(V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) online_mlp_V_to_V_star = utils.dist_mu(env.mu, env.V_star, jnp.array(Vs)) online_mlp_V_to_origin = utils.mu_norm(env.mu, jnp.array(Vs)) ts = thetas[args.plot_start::args.plot_step] online_condition_numbers = [] for i in range(len(ts)): V.theta = ts[i] online_condition_numbers.append(utils.jac_cond(V.jacobian())) np.random.seed(args.seed) V = mlp.MLP(Phi, [args.width] * args.depth, biases=False) thetas, Vs = expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) mlp_V_to_V_star = utils.dist_mu(env.mu, env.V_star, jnp.array(Vs)) mlp_V_to_origin = utils.mu_norm(env.mu, jnp.array(Vs)) ts = thetas[args.plot_start::args.plot_step] condition_numbers = [] for i in range(len(ts)): V.theta = ts[i] condition_numbers.append(utils.jac_cond(V.jacobian())) # bound = utils.overparam_cond_number_bound(env.P, env.mu, env.gamma, args.k) # smoothness = max([abs(env.V_star[i] - env.V_star[i-1]) for i in range(args.n)]) return online_mlp_V_to_V_star, online_mlp_V_to_origin, mlp_V_to_V_star, mlp_V_to_origin, condition_numbers, online_condition_numbers
def run_experiment(args): P = P_matrices.build_cyclic_P(args.n, args.delta) R_mat = np.zeros_like(P) R_mat[0, 1] = 1 env = environment.MRP(args.gamma, P, R_mat) # build features angles = np.linspace(0, 2 * np.pi, args.n, endpoint=False) Phi = np.concatenate([np.expand_dims(np.sin(angles), 1), np.expand_dims(np.cos(angles), 1), np.ones((args.n,1))], axis = 1) # np.random.seed(args.seed) # V = mlp.MLP(Phi, [args.width]*args.depth, biases = False, activation='tanh') # if args.online: # Vs, thetas, _, _ = online_td0.TD0(V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) # else: # thetas, Vs = expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) # tanh_mlp_V_to_V_star = utils.dist_mu(env.mu, env.V_star, jnp.array(Vs)) # ts = thetas[args.plot_start::args.plot_step] # tanh_dynamics = [] # for i in range(len(ts)): # V.theta = ts[i] # tanh_dynamics.append(utils.dynamics_norm(V, env.A, env.V_star)) # tanh_params = [] # for i in range(len(ts)): # theta = np.concatenate([x.flatten() for x in ts[i]]).ravel() # tanh_params.append(np.linalg.norm(theta)) tanh_params, tanh_dynamics, tanh_mlp_V_to_V_star = [],[],[] np.random.seed(args.seed) V = mlp.MLP(Phi, [args.width]*args.depth, biases = False, activation='ReLU') if args.online: Vs, thetas, _, _ = online_td0.TD0(V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) else: thetas, Vs = expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) mlp_V_to_V_star = utils.dist_mu(env.mu, env.V_star, jnp.array(Vs)) ts = thetas[args.plot_start::args.plot_step] dynamics = [] for i in range(len(ts)): V.theta = ts[i] dynamics.append(utils.norm_dynamics(V, env.A, env.V_star)) params = [] for i in range(len(ts)): theta = np.concatenate([x.flatten() for x in ts[i]]).ravel() params.append(np.linalg.norm(theta)) bound = utils.overparam_cond_number_bound(env.P, env.mu, env.gamma, args.k) return tanh_mlp_V_to_V_star, tanh_dynamics, tanh_params, mlp_V_to_V_star, dynamics, params, bound
def run_experiment(args): np.random.seed(args.seed) P = build_P(args.n, args.delta) # R_mat = np.zeros_like(P) # R_mat[1, 0] = 1 # env = environment.MRP(args.gamma, P, R_mat) V_star = np.zeros(args.n) for i in range(0, args.n, 2): V_star[i] = 1 env = environment.MRP(args.gamma, P, V_star=V_star) bound = utils.overparam_cond_number_bound(env.P, env.mu, env.gamma, args.k) angles = np.linspace(0, 2 * np.pi, args.n, endpoint=False) Phi = np.concatenate([ np.expand_dims(np.sin(angles), 1), np.expand_dims(np.cos(angles), 1), np.ones((args.n, 1)) ], axis=1) V = simple.Tabular(np.zeros(args.n)) thetas, Vs = expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx) tabular_Vs = utils.dist_mu(env, np.array(Vs)[args.plot_start::args.plot_step]) V = mlp.MLP(Phi, [args.width] * args.depth, biases=False) thetas, Vs = expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx) mlp_Vs = utils.dist_mu(env, np.array(Vs)[args.plot_start::args.plot_step]) ts = thetas[args.plot_start::args.plot_step] condition_numbers = [] for i in range(len(ts)): V.theta = ts[i] condition_numbers.append(utils.jac_cond(V.jacobian())) V = simple.Linear(np.zeros(Phi.shape[1]), Phi) thetas, Vs = expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx) linear_Vs = utils.dist_mu(env, np.array(Vs)[args.plot_start::args.plot_step]) smoothness = max( [abs(env.V_star[i] - env.V_star[i - 1]) for i in range(args.n)]) return tabular_Vs, linear_Vs, mlp_Vs, condition_numbers, bound, smoothness
def run_experiment(args): np.random.seed(args.seed) P = P_matrices.build_cyclic_P(args.n, args.delta) R_mat = np.zeros_like(P) env = environment.MRP(args.gamma, P, R_mat) bound = utils.overparam_cond_number_bound(env.P, env.mu, env.gamma, args.k) orientation = -1 * np.ones(args.n) orientation[-1] = args.n - 1 orientation = 10.0 * orientation init_conditions = -20.0 print(orientation) epsilon = 0.05 P_spir = P_matrices.build_cyclic_P(args.n, 0.5) V = spiral.Spiral(init_conditions, P_spir, orientation, epsilon) if args.online: Vs, thetas, _, _ = online_td0.TD0(V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) else: thetas, Vs = numpy_expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) spiral_Vs = utils.dist_mu(env.mu, env.V_star, np.array(Vs)) ts = thetas[args.plot_start::args.plot_step] condition_numbers = [] for i in range(len(ts)): V.theta = ts[i] condition_numbers.append(utils.jac_cond(V.jacobian())) V = simple.Tabular(Vs[0]) if args.online: Vs, thetas, _, _ = online_td0.TD0(V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) else: thetas, Vs = numpy_expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) tabular_Vs = utils.dist_mu(env.mu, env.V_star, np.array(Vs)) smoothness = max( [abs(env.V_star[i] - env.V_star[i - 1]) for i in range(args.n)]) return tabular_Vs, spiral_Vs, condition_numbers, bound, smoothness
def run_experiment(args): np.random.seed(args.seed) P = P_matrices.build_cyclic_P(args.n, args.delta) if args.hard: V_star = np.zeros(args.n) for i in range(0, args.n, 2): V_star[i] = 1 env = environment.MRP(args.gamma, P, V_star=V_star) else: R_mat = np.zeros_like(P) R_mat[0, 1] = 1 # env = environment.MRP(args.gamma, P, R_mat=R_mat) #fixed V_star PV_star = P_matrices.build_cyclic_P(args.n, 0.5) V_star = environment.MRP(args.gamma, PV_star, R_mat).V_star env = environment.MRP(args.gamma, P, V_star=V_star) bound = utils.overparam_cond_number_bound(env.P, env.mu, env.gamma, args.k) angles = np.linspace(0, 2 * np.pi, args.n, endpoint=False) Phi = np.concatenate([ np.expand_dims(np.sin(angles), 1), np.expand_dims(np.cos(angles), 1), np.ones((args.n, 1)) ], axis=1) # V = simple.Tabular(np.zeros(args.n)) # if args.online: # Vs, thetas, _, _ = online_td0.TD0(V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) # else: # thetas, Vs = expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) # tabular_Vs = utils.dist_mu(env.mu, env.V_star, np.array(Vs)) tabular_Vs = [] V = mlp.MLP(Phi, [args.width] * args.depth, biases=False) if args.online: Vs, thetas, _, _ = online_td0.TD0(V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) else: thetas, Vs = expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) mlp_Vs = utils.dist_mu(env.mu, env.V_star, np.array(Vs)) ts = thetas[args.plot_start::args.plot_step] condition_numbers = [] for i in range(len(ts)): V.theta = ts[i] condition_numbers.append(utils.jac_cond(V.jacobian())) # V = simple.Linear(np.zeros(Phi.shape[1]), Phi) # if args.online: # Vs, thetas, _, _ = online_td0.TD0(V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) # else: # thetas, Vs = expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) # linear_Vs = utils.dist_mu(env.mu, env.V_star, np.array(Vs)) linear_Vs = [] smoothness = max( [abs(env.V_star[i] - env.V_star[i - 1]) for i in range(args.n)]) return tabular_Vs, linear_Vs, mlp_Vs, condition_numbers, bound, smoothness
from td.funcs import spiral, simple, two_layers from td.envs import environment from td.algs import expected_td0, online_td0 import numpy as np from matplotlib import pyplot as plt from mpl_toolkits.mplot3d import Axes3D P = np.array([[0.5, 0, 0.5], [0.5, 0.5, 0], [0, 0.5, 0.5]]) gamma = 0.9 R_mat = np.zeros_like(P) env = environment.MRP(gamma, P, R_mat) # Spiral V = spiral.Spiral(-7.0, P, np.array([-10, -10, 20]), 0.05) thetas, Vs = expected_td0.TD0(V, env, 0.00001, 25000, 10000) Vs = np.array(Vs) fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.plot(Vs[:, 0], Vs[:, 1], Vs[:, 2]) #Tabular expected V = simple.Tabular(Vs[0, :]) thetas, Vs = expected_td0.TD0(V, env, 0.1, 1000, 1000) Vs = np.array(Vs)