def train(args, X, Y, dbg_iter=None, dbg_epoch=None, dbg_done=None): dbg_out = [] net_in, net_out = hybrid_network(args.num_inputs, args.num_outputs, args.num_units, args.num_sto, dbg_out=dbg_out) params, f_step, f_loss, f_grad, f_surr = \ make_funcs(net_in, net_out, args, dbg_out=dbg_out) param_col = ParamCollection(params) init_params = nn.init_array(args.init_conf, (param_col.get_total_size(), 1)) param_col.set_value_flat(init_params.flatten()) init_params = [ np.array([[0., 1.]]), # W_1 np.array([[0., 0.]]), # b_1 np.array([[1.], [1.]]), # W_3 np.array([[0.]]), # b_3 ] param_col.set_values(init_params) if 'snapshot' in args: print "Loading params from previous snapshot" snapshot = pickle.load(open(args['snapshot'], 'r')) param_col.set_values(snapshot) # param_col.set_value_flat( # np.random.normal(0., 1.,size=param_col.get_total_size()) # ) # optim_state = Table(theta=param_col.get_value_flat(), # scratch=param_col.get_value_flat(), # step_size=args.step_size # ) optim_state = make_rmsprop_state(theta=param_col.get_value_flat(), step_size=args.step_size, decay_rate=args.decay_rate) for i_epoch in range(args.n_epochs): for i_iter in range(X.shape[0]): ind = np.random.choice(X.shape[0], args['size_batch']) x, y = X[ind], Y[ind] # not sure this works for multi-dim info = f_surr(x, y, num_samples=args['size_sample']) loss, loss_surr, grad = info['loss'], info['surr_loss'], info[ 'surr_grad'] # loss, loss_surr, grad = f_grad(x, y) # update rmsprop_update(param_col.flatten_values(grad), optim_state) # optim_state.scratch = param_col.flatten_values(grad) # optim_state.theta -= optim_state.step_size * optim_state.scratch param_col.set_value_flat(optim_state.theta) print param_col.get_value_flat() if dbg_iter: dbg_iter(i_epoch, i_iter, param_col, optim_state, info) if dbg_epoch: dbg_epoch(i_epoch, param_col, f_surr) if dbg_done: dbg_done(param_col, optim_state, f_surr) return optim_state
def train(args, X, Y, dbg_iter=None, dbg_epoch=None, dbg_done=None): dbg_out = [] net_in, net_out = hybrid_network(args.num_inputs, args.num_outputs, args.num_units, args.num_sto, dbg_out=dbg_out) params, f_step, f_loss, f_grad, f_surr = \ make_funcs(net_in, net_out, args, dbg_out=dbg_out) param_col = ParamCollection(params) init_params = nn.init_array(args.init_conf, (param_col.get_total_size(), 1)) param_col.set_value_flat(init_params.flatten()) init_params = [ np.array([[0., 1.]]), # W_1 np.array([[0., 0.]]), # b_1 np.array([[1.], [1.]]), # W_3 np.array([[0.]]), # b_3 ] param_col.set_values(init_params) if 'snapshot' in args: print "Loading params from previous snapshot" snapshot = pickle.load(open(args['snapshot'], 'r')) param_col.set_values(snapshot) # param_col.set_value_flat( # np.random.normal(0., 1.,size=param_col.get_total_size()) # ) # optim_state = Table(theta=param_col.get_value_flat(), # scratch=param_col.get_value_flat(), # step_size=args.step_size # ) optim_state = make_rmsprop_state(theta=param_col.get_value_flat(), step_size=args.step_size, decay_rate=args.decay_rate) for i_epoch in range(args.n_epochs): for i_iter in range(X.shape[0]): ind = np.random.choice(X.shape[0], args['size_batch']) x, y = X[ind], Y[ind] # not sure this works for multi-dim info = f_surr(x, y, num_samples=args['size_sample']) loss, loss_surr, grad = info['loss'], info['surr_loss'], info['surr_grad'] # loss, loss_surr, grad = f_grad(x, y) # update rmsprop_update(param_col.flatten_values(grad), optim_state) # optim_state.scratch = param_col.flatten_values(grad) # optim_state.theta -= optim_state.step_size * optim_state.scratch param_col.set_value_flat(optim_state.theta) print param_col.get_value_flat() if dbg_iter: dbg_iter(i_epoch, i_iter, param_col, optim_state, info) if dbg_epoch: dbg_epoch(i_epoch, param_col, f_surr) if dbg_done: dbg_done(param_col, optim_state, f_surr) return optim_state
class MujocoPolicy(PPOPolicy, Serializable): def __init__(self, obs_dim, ctrl_dim): cgt.set_precision('double') Serializable.__init__(self, obs_dim, ctrl_dim) self.obs_dim = obs_dim self.ctrl_dim = ctrl_dim o_no = cgt.matrix("o_no",fixed_shape=(None,obs_dim)) a_na = cgt.matrix("a_na",fixed_shape = (None, ctrl_dim)) adv_n = cgt.vector("adv_n") oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2*ctrl_dim)) self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)), name="std_1a") std_1a = cgt.exp(logstd_1a) # Here's where we apply the network h0 = o_no nhid = 32 h1 = cgt.tanh(nn.Affine(obs_dim,nhid,weight_init=nn.IIDGaussian(std=0.1))(h0)) h2 = cgt.tanh(nn.Affine(nhid,nhid,weight_init=nn.IIDGaussian(std=0.1))(h1)) mean_na = nn.Affine(nhid,ctrl_dim,weight_init=nn.IIDGaussian(std=0.01))(h2) b = cgt.size(o_no, 0) std_na = cgt.repeat(std_1a, b, axis=0) oldmean_na = oldpdist_np[:, 0:self.ctrl_dim] oldstd_na = oldpdist_np[:, self.ctrl_dim:2*self.ctrl_dim] logp_n = ((-.5) * cgt.square( (a_na - mean_na) / std_na ).sum(axis=1)) - logstd_1a.sum() oldlogp_n = ((-.5) * cgt.square( (a_na - oldmean_na) / oldstd_na ).sum(axis=1)) - cgt.log(oldstd_na).sum(axis=1) ratio_n = cgt.exp(logp_n - oldlogp_n) surr = (ratio_n*adv_n).mean() pdists_np = cgt.concatenate([mean_na, std_na], axis=1) # kl = cgt.log(sigafter/) params = nn.get_parameters(surr) oldvar_na = cgt.square(oldstd_na) var_na = cgt.square(std_na) kl = (cgt.log(std_na / oldstd_na) + (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) - .5).sum(axis=1).mean() lam = cgt.scalar() penobj = surr - lam * kl self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self._compute_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_na, adv_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)])) self.f_pdist = cgt.function([o_no], pdists_np) self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self.pc = ParamCollection(params) def compute_surr_kl(self, *args): return self._compute_surr_kl(*args) def compute_grad_lagrangian(self, *args): return self._compute_grad_lagrangian(*args) def get_stdev(self): return np.exp(self.logstd.op.get_value().ravel()) def step(self, X): pdist_na = self.f_pdist(X) acts_n = meanstd_sample(pdist_na) return { "action" : acts_n, "pdist" : pdist_na } def compute_entropy(self, pdist_np): return meanstd_entropy(pdist_np) def pdist_ndim(self): return 2*self.ctrl_dim def get_parameters_flat(self): return self.pc.get_value_flat() def set_parameters_flat(self,th): return self.pc.set_value_flat(th)
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--grad_check", action="store_true") parser.add_argument("--n_batches", type=int, default=1000000) parser.add_argument("--profile", action="store_true") parser.add_argument("--unittest", action="store_true") args = parser.parse_args() np.seterr("raise") cgt.set_precision("quad" if args.grad_check else "double") np.random.seed(0) # model parameters if args.grad_check: opt = NTMOpts( b=1, # batch size h=1, # number of heads n=2, # number of memory sites m=3, # dimension at each memory site k=4, # dimension of input p=2, # dimension of output ff_hid_sizes=[]) seq_length = 2 else: opt = NTMOpts( b=64, # batch size h=3, # number of heads n=128, # number of memory sites m=20, # dimension at each memory site k=3, # dimension of input p=1, # dimension of output ff_hid_sizes=[128, 128]) seq_length = 10 if args.unittest: seq_length = 3 args.n_batches = 3 tstart = time.time() ntm = make_ntm(opt) task = CopyTask(opt.b, seq_length, opt.p) f_loss, f_loss_and_grad, params = make_funcs(opt, ntm, task.total_time(), task.loss_timesteps()) print "graph construction and compilation took %g seconds" % (time.time() - tstart) pc = ParamCollection(params) pc.set_value_flat(nr.uniform(-.1, .1, size=(pc.get_total_size(), ))) if args.grad_check: x, y = task.gen_batch() def f(thnew): thold = th.copy() pc.set_value_flat(thnew) loss = f_loss(x, y) pc.set_value_flat(thold) return loss from cgt.numeric_diff import numeric_grad g_num = numeric_grad(f, th, eps=1e-8) _, _, g_anal = f_loss_and_grad(x, y) assert np.allclose(g_num, g_anal, atol=1e-8) print "Gradient check succeeded!" print "%i/%i elts of grad are nonzero" % ( (g_anal != 0).sum(), g_anal.size) return seq_num = 0 state = make_rmsprop_state(pc.get_value_flat(), .01, .95) print fmt_row(13, ["seq num", "CE (bits)", "accuracy", "|g|_inf"], header=True) if args.profile: cgt.profiler.start() for i in xrange(args.n_batches): x, y = task.gen_batch() seq_num += x.shape[1] l, l01, g = f_loss_and_grad(x, y) print fmt_row(13, [seq_num, l, l01, np.abs(g).max()]) rmsprop_update(g, state) pc.set_value_flat(state.theta) if not np.isfinite(l): break if args.profile: cgt.profiler.print_stats()
class AtariRAMPolicy(PPOPolicy, Serializable): def __init__(self, n_actions): Serializable.__init__(self, n_actions) cgt.set_precision('double') n_in = 128 o_no = cgt.matrix("o_no",fixed_shape=(None,n_in)) a_n = cgt.vector("a_n",dtype='i8') q_n = cgt.vector("q_n") oldpdist_np = cgt.matrix("oldpdists") nhid, nhid2 = 64, 64 h0 = (o_no - 128.0)/128.0 d0 = nn.dropout(h1, .2) h1 = nn.rectify(nn.Affine(128,nhid,weight_init=nn.IIDGaussian(std=.1))(d0)) d1 = nn.dropout(h1, .2) h2 = nn.rectify(nn.Affine(nhid,nhid2,weight_init=nn.IIDGaussian(std=.1))(d1)) # d2 = nn.dropout(h2, .2) probs_na = nn.softmax(nn.Affine(nhid2,n_actions,weight_init=nn.IIDGaussian(std=0.01))(d2)) logprobs_na = cgt.log(probs_na) b = cgt.size(o_no, 0) logps_n = logprobs_na[cgt.arange(b), a_n] surr = (logps_n*q_n).mean() kl = (oldpdist_np * cgt.log(oldpdist_np/probs_na)).sum(axis=1).mean() params = nn.get_parameters(surr) gradsurr = cgt.grad(surr, params) flatgrad = cgt.concatenate([p.flatten() for p in gradsurr]) lam = cgt.scalar() penobj = surr - lam * kl self._f_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_n, q_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)])) self.f_pdist = cgt.function([o_no], probs_na) self.f_probs = cgt.function([o_no], probs_na) self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n], [surr, kl]) self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad) self.pc = ParamCollection(params) def step(self, X): pdist_na = self.f_probs(X) acts_n = cat_sample(pdist_na) return { "action" : acts_n, "pdist" : pdist_na } def compute_gradient(self, pdist_np, o_no, a_n, q_n): return self.f_gradlogp(pdist_np, o_no, a_n, q_n) def compute_surr_kl(self, pdist_np, o_no, a_n, q_n): return self.f_surr_kl(pdist_np, o_no, a_n, q_n) def compute_grad_lagrangian(self, lam, pdist_np, o_no, a_n, q_n): return self._f_grad_lagrangian(lam, pdist_np, o_no, a_n, q_n) def compute_entropy(self, pdist_np): return cat_entropy(pdist_np) def get_parameters_flat(self): return self.pc.get_value_flat() def set_parameters_flat(self,th): return self.pc.set_value_flat(th)
def main(): nr.seed(0) parser = argparse.ArgumentParser() parser.add_argument("--data_dir", type=str, default="alice") parser.add_argument("--size_mem", type=int,default=64) parser.add_argument("--size_batch", type=int,default=64) parser.add_argument("--n_layers",type=int,default=2) parser.add_argument("--n_unroll",type=int,default=16) parser.add_argument("--k_in",type=int,default=3) parser.add_argument("--k_h",type=int,default=5) parser.add_argument("--step_size",type=float,default=.01) parser.add_argument("--decay_rate",type=float,default=0.95) parser.add_argument("--n_epochs",type=int,default=20) parser.add_argument("--arch",choices=["lstm","gru"],default="gru") parser.add_argument("--grad_check",action="store_true") parser.add_argument("--profile",action="store_true") parser.add_argument("--unittest",action="store_true") args = parser.parse_args() cgt.set_precision("quad" if args.grad_check else "single") assert args.n_unroll > 1 loader = Loader(args.data_dir,args.size_batch, args.n_unroll, (.8,.1,.1)) network, f_loss, f_loss_and_grad, f_step = make_loss_and_grad_and_step(args.arch, loader.size_vocab, loader.size_vocab, args.size_mem, args.size_batch, args.n_layers, args.n_unroll, args.k_in, args.k_h) if args.profile: profiler.start() params = network.get_parameters() pc = ParamCollection(params) pc.set_value_flat(nr.uniform(-0.01, 0.01, size=(pc.get_total_size(),))) for i, param in enumerate(pc.params): if "is_rotation" in param.props: shape = pc.get_shapes()[i] num_vec = int(shape[0] / 2) size_vec = int(shape[1]) gauss = nr.normal(size=(num_vec * size_vec)) gauss = np.reshape(gauss, (num_vec, size_vec)) gauss_mag = norm(gauss, axis=1, keepdims=True) gauss_normed = gauss / gauss_mag gauss_perturb = nr.normal(scale=0.01, size=(num_vec * size_vec)) gauss_perturb = np.reshape(gauss_perturb, (num_vec, size_vec)) second_vec = gauss_normed + gauss_perturb second_vec_mag = norm(second_vec, axis=1, keepdims=True) second_vec_normed = second_vec / second_vec_mag new_param_value = np.zeros(shape) for j in xrange(num_vec): new_param_value[2 * j, :] = gauss_normed[j, :] new_param_value[2 * j + 1, :] = second_vec_normed[j, :] param.op.set_value(new_param_value) #print new_param_value def initialize_hiddens(n): return [np.ones((n, args.size_mem), cgt.floatX) / float(args.size_mem) for _ in xrange(get_num_hiddens(args.arch, args.n_layers))] if args.grad_check: #if True: x,y = loader.train_batches_iter().next() prev_hiddens = initialize_hiddens(args.size_batch) def f(thnew): thold = pc.get_value_flat() pc.set_value_flat(thnew) loss = f_loss(x,y, *prev_hiddens) pc.set_value_flat(thold) return loss from cgt.numeric_diff import numeric_grad print "Beginning grad check" g_num = numeric_grad(f, pc.get_value_flat(),eps=1e-10) print "Ending grad check" result = f_loss_and_grad(x,y,*prev_hiddens) g_anal = result[1] diff = g_num - g_anal abs_diff = np.abs(diff) print np.where(abs_diff > 1e-4) print diff[np.where(abs_diff > 1e-4)] embed() assert np.allclose(g_num, g_anal, atol=1e-4) print "Gradient check succeeded!" return optim_state = make_rmsprop_state(theta=pc.get_value_flat(), step_size = args.step_size, decay_rate = args.decay_rate) for iepoch in xrange(args.n_epochs): losses = [] tstart = time() print "starting epoch",iepoch cur_hiddens = initialize_hiddens(args.size_batch) for (x,y) in loader.train_batches_iter(): out = f_loss_and_grad(x,y, *cur_hiddens) loss = out[0] grad = out[1] cur_hiddens = out[2:] rmsprop_update(grad, optim_state) pc.set_value_flat(optim_state.theta) losses.append(loss) if args.unittest: return print "%.3f s/batch. avg loss = %.3f"%((time()-tstart)/len(losses), np.mean(losses)) optim_state.step_size *= .98 #pylint: disable=E1101 sample(f_step, initialize_hiddens(1), char2ind=loader.char2ind, n_steps=300, temp=1.0, seed_text = "") if args.profile: profiler.print_stats()
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--grad_check",action="store_true") parser.add_argument("--n_batches",type=int,default=1000000) parser.add_argument("--profile",action="store_true") parser.add_argument("--unittest", action="store_true") parser.add_argument("--task",choices=["copy","reverse_copy","repeat_copy"],default="copy") args = parser.parse_args() np.seterr("raise") cgt.set_precision("quad" if args.grad_check else "double") np.random.seed(0) # model parameters if args.grad_check: opt = NTMOpts( b = 1, # batch size h = 1, # number of heads n = 2, # number of memory sites m = 3, # dimension at each memory site k = 4, # dimension of input p = 2, # dimension of output ff_hid_sizes = [] ) seq_length = 2 else: opt = NTMOpts( b = 64, # batch size h = 3, # number of heads n = 128, # number of memory sites m = 20, # dimension at each memory site k = 3, # dimension of input p = 1, # dimension of output ff_hid_sizes = [128,128] ) seq_length = 10 if args.unittest: seq_length=3 args.n_batches=3 tstart = time.time() ntm = make_ntm(opt) if args.task == "copy": task = CopyTask(opt.b, seq_length, opt.p) elif args.task == "reverse_copy": task = ReverseCopyTask(opt.b, seq_length, opt.p) elif args.task == "repeat_copy": n_copies = 4 task = RepeatCopyTask(opt.b, seq_length, opt.p, n_copies) f_loss, f_loss_and_grad, params = make_funcs(opt, ntm, task.total_time(), task.loss_timesteps()) print "graph construction and compilation took %g seconds"%(time.time()-tstart) pc = ParamCollection(params) pc.set_value_flat(nr.uniform(-.1, .1, size=(pc.get_total_size(),))) if args.grad_check: x,y = task.gen_batch() def f(thnew): thold = th.copy() pc.set_value_flat(thnew) loss = f_loss(x,y) pc.set_value_flat(thold) return loss from cgt.numeric_diff import numeric_grad g_num = numeric_grad(f, th,eps=1e-8) _, _, g_anal = f_loss_and_grad(x,y) assert np.allclose(g_num, g_anal, atol=1e-8) print "Gradient check succeeded!" print "%i/%i elts of grad are nonzero"%( (g_anal != 0).sum(), g_anal.size ) return seq_num = 0 state = make_rmsprop_state(pc.get_value_flat(), .01, .95) print fmt_row(13, ["seq num", "CE (bits)", "accuracy", "|g|_inf"], header=True) if args.profile: cgt.profiler.start() for i in xrange(args.n_batches): x,y = task.gen_batch() seq_num += x.shape[1] l,l01,g = f_loss_and_grad(x,y) print fmt_row(13, [seq_num, l,l01,np.abs(g).max()]) rmsprop_update(g, state) pc.set_value_flat(state.theta) if not np.isfinite(l): break if args.profile: cgt.profiler.print_stats()
def main(): nr.seed(0) parser = argparse.ArgumentParser() parser.add_argument("--data_dir", type=str, default="alice") parser.add_argument("--size_mem", type=int, default=64) parser.add_argument("--size_batch", type=int, default=64) parser.add_argument("--n_layers", type=int, default=2) parser.add_argument("--n_unroll", type=int, default=16) parser.add_argument("--step_size", type=float, default=.01) parser.add_argument("--decay_rate", type=float, default=0.95) parser.add_argument("--n_epochs", type=int, default=20) parser.add_argument("--arch", choices=["lstm", "gru"], default="lstm") parser.add_argument("--grad_check", action="store_true") parser.add_argument("--profile", action="store_true") parser.add_argument("--unittest", action="store_true") args = parser.parse_args() cgt.set_precision("quad" if args.grad_check else "single") assert args.n_unroll > 1 loader = Loader(args.data_dir, args.size_batch, args.n_unroll, (.8, .1, .1)) network, f_loss, f_loss_and_grad, f_step = make_loss_and_grad_and_step( args.arch, loader.size_vocab, loader.size_vocab, args.size_mem, args.size_batch, args.n_layers, args.n_unroll) if args.profile: profiler.start() params = network.get_parameters() pc = ParamCollection(params) pc.set_value_flat(nr.uniform(-.1, .1, size=(pc.get_total_size(), ))) def initialize_hiddens(n): return [ np.zeros((n, args.size_mem), cgt.floatX) for _ in xrange(get_num_hiddens(args.arch, args.n_layers)) ] if args.grad_check: x, y = loader.train_batches_iter().next() prev_hiddens = initialize_hiddens(args.size_batch) def f(thnew): thold = pc.get_value_flat() pc.set_value_flat(thnew) loss = f_loss(x, y, *prev_hiddens) pc.set_value_flat(thold) return loss from cgt.numeric_diff import numeric_grad g_num = numeric_grad(f, pc.get_value_flat(), eps=1e-10) result = f_loss_and_grad(x, y, *prev_hiddens) g_anal = result[1] assert np.allclose(g_num, g_anal, atol=1e-4) print "Gradient check succeeded!" return optim_state = make_rmsprop_state(theta=pc.get_value_flat(), step_size=args.step_size, decay_rate=args.decay_rate) for iepoch in xrange(args.n_epochs): losses = [] tstart = time() print "starting epoch", iepoch cur_hiddens = initialize_hiddens(args.size_batch) for (x, y) in loader.train_batches_iter(): out = f_loss_and_grad(x, y, *cur_hiddens) loss = out[0] grad = out[1] cur_hiddens = out[2:] rmsprop_update(grad, optim_state) pc.set_value_flat(optim_state.theta) losses.append(loss) if args.unittest: return print "%.3f s/batch. avg loss = %.3f" % ( (time() - tstart) / len(losses), np.mean(losses)) optim_state.step_size *= .98 #pylint: disable=E1101 sample(f_step, initialize_hiddens(1), char2ind=loader.char2ind, n_steps=300, temp=1.0, seed_text="") if args.profile: profiler.print_stats()
class AtariRAMPolicy(PPOPolicy, Serializable): def __init__(self, n_actions): Serializable.__init__(self, n_actions) cgt.set_precision('double') n_in = 128 o_no = cgt.matrix("o_no", fixed_shape=(None, n_in)) a_n = cgt.vector("a_n", dtype='i8') q_n = cgt.vector("q_n") oldpdist_np = cgt.matrix("oldpdists") h0 = (o_no - 128.0) / 128.0 nhid = 64 h1 = cgt.tanh( nn.Affine(128, nhid, weight_init=nn.IIDGaussian(std=.1))(h0)) probs_na = nn.softmax( nn.Affine(nhid, n_actions, weight_init=nn.IIDGaussian(std=0.01))(h1)) logprobs_na = cgt.log(probs_na) b = cgt.size(o_no, 0) logps_n = logprobs_na[cgt.arange(b), a_n] surr = (logps_n * q_n).mean() kl = (oldpdist_np * cgt.log(oldpdist_np / probs_na)).sum(axis=1).mean() params = nn.get_parameters(surr) gradsurr = cgt.grad(surr, params) flatgrad = cgt.concatenate([p.flatten() for p in gradsurr]) lam = cgt.scalar() penobj = surr - lam * kl self._f_grad_lagrangian = cgt.function( [lam, oldpdist_np, o_no, a_n, q_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)])) self.f_pdist = cgt.function([o_no], probs_na) self.f_probs = cgt.function([o_no], probs_na) self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n], [surr, kl]) self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad) self.pc = ParamCollection(params) def step(self, X): pdist_na = self.f_probs(X) acts_n = cat_sample(pdist_na) return {"action": acts_n, "pdist": pdist_na} def compute_gradient(self, pdist_np, o_no, a_n, q_n): return self.f_gradlogp(pdist_np, o_no, a_n, q_n) def compute_surr_kl(self, pdist_np, o_no, a_n, q_n): return self.f_surr_kl(pdist_np, o_no, a_n, q_n) def compute_grad_lagrangian(self, lam, pdist_np, o_no, a_n, q_n): return self._f_grad_lagrangian(lam, pdist_np, o_no, a_n, q_n) def compute_entropy(self, pdist_np): return cat_entropy(pdist_np) def get_parameters_flat(self): return self.pc.get_value_flat() def set_parameters_flat(self, th): return self.pc.set_value_flat(th)
def main(): nr.seed(0) parser = argparse.ArgumentParser() parser.add_argument("--data_dir", type=str, default="alice") parser.add_argument("--size_mem", type=int, default=64) parser.add_argument("--size_batch", type=int, default=64) parser.add_argument("--n_layers", type=int, default=2) parser.add_argument("--n_unroll", type=int, default=16) parser.add_argument("--k_in", type=int, default=3) parser.add_argument("--k_h", type=int, default=5) parser.add_argument("--step_size", type=float, default=.01) parser.add_argument("--decay_rate", type=float, default=0.95) parser.add_argument("--n_epochs", type=int, default=20) parser.add_argument("--arch", choices=["lstm", "gru"], default="gru") parser.add_argument("--grad_check", action="store_true") parser.add_argument("--profile", action="store_true") parser.add_argument("--unittest", action="store_true") args = parser.parse_args() cgt.set_precision("quad" if args.grad_check else "single") assert args.n_unroll > 1 loader = Loader(args.data_dir, args.size_batch, args.n_unroll, (.8, .1, .1)) network, f_loss, f_loss_and_grad, f_step = make_loss_and_grad_and_step( args.arch, loader.size_vocab, loader.size_vocab, args.size_mem, args.size_batch, args.n_layers, args.n_unroll, args.k_in, args.k_h) if args.profile: profiler.start() params = network.get_parameters() pc = ParamCollection(params) pc.set_value_flat(nr.uniform(-0.01, 0.01, size=(pc.get_total_size(), ))) for i, param in enumerate(pc.params): if "is_rotation" in param.props: shape = pc.get_shapes()[i] num_vec = int(shape[0] / 2) size_vec = int(shape[1]) gauss = nr.normal(size=(num_vec * size_vec)) gauss = np.reshape(gauss, (num_vec, size_vec)) gauss_mag = norm(gauss, axis=1, keepdims=True) gauss_normed = gauss / gauss_mag gauss_perturb = nr.normal(scale=0.01, size=(num_vec * size_vec)) gauss_perturb = np.reshape(gauss_perturb, (num_vec, size_vec)) second_vec = gauss_normed + gauss_perturb second_vec_mag = norm(second_vec, axis=1, keepdims=True) second_vec_normed = second_vec / second_vec_mag new_param_value = np.zeros(shape) for j in xrange(num_vec): new_param_value[2 * j, :] = gauss_normed[j, :] new_param_value[2 * j + 1, :] = second_vec_normed[j, :] param.op.set_value(new_param_value) #print new_param_value def initialize_hiddens(n): return [ np.ones((n, args.size_mem), cgt.floatX) / float(args.size_mem) for _ in xrange(get_num_hiddens(args.arch, args.n_layers)) ] if args.grad_check: #if True: x, y = loader.train_batches_iter().next() prev_hiddens = initialize_hiddens(args.size_batch) def f(thnew): thold = pc.get_value_flat() pc.set_value_flat(thnew) loss = f_loss(x, y, *prev_hiddens) pc.set_value_flat(thold) return loss from cgt.numeric_diff import numeric_grad print "Beginning grad check" g_num = numeric_grad(f, pc.get_value_flat(), eps=1e-10) print "Ending grad check" result = f_loss_and_grad(x, y, *prev_hiddens) g_anal = result[1] diff = g_num - g_anal abs_diff = np.abs(diff) print np.where(abs_diff > 1e-4) print diff[np.where(abs_diff > 1e-4)] embed() assert np.allclose(g_num, g_anal, atol=1e-4) print "Gradient check succeeded!" return optim_state = make_rmsprop_state(theta=pc.get_value_flat(), step_size=args.step_size, decay_rate=args.decay_rate) for iepoch in xrange(args.n_epochs): losses = [] tstart = time() print "starting epoch", iepoch cur_hiddens = initialize_hiddens(args.size_batch) for (x, y) in loader.train_batches_iter(): out = f_loss_and_grad(x, y, *cur_hiddens) loss = out[0] grad = out[1] cur_hiddens = out[2:] rmsprop_update(grad, optim_state) pc.set_value_flat(optim_state.theta) losses.append(loss) if args.unittest: return print "%.3f s/batch. avg loss = %.3f" % ( (time() - tstart) / len(losses), np.mean(losses)) optim_state.step_size *= .98 #pylint: disable=E1101 sample(f_step, initialize_hiddens(1), char2ind=loader.char2ind, n_steps=300, temp=1.0, seed_text="") if args.profile: profiler.print_stats()
class MujocoPolicy(PPOPolicy, Serializable): def __init__(self, obs_dim, ctrl_dim): cgt.set_precision('double') Serializable.__init__(self, obs_dim, ctrl_dim) self.obs_dim = obs_dim self.ctrl_dim = ctrl_dim o_no = cgt.matrix("o_no", fixed_shape=(None, obs_dim)) a_na = cgt.matrix("a_na", fixed_shape=(None, ctrl_dim)) adv_n = cgt.vector("adv_n") oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2 * ctrl_dim)) self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)), name="std_1a") std_1a = cgt.exp(logstd_1a) # Here's where we apply the network h0 = o_no nhid = 32 h1 = cgt.tanh( nn.Affine(obs_dim, nhid, weight_init=nn.IIDGaussian(std=0.1))(h0)) h2 = cgt.tanh( nn.Affine(nhid, nhid, weight_init=nn.IIDGaussian(std=0.1))(h1)) mean_na = nn.Affine(nhid, ctrl_dim, weight_init=nn.IIDGaussian(std=0.01))(h2) b = cgt.size(o_no, 0) std_na = cgt.repeat(std_1a, b, axis=0) oldmean_na = oldpdist_np[:, 0:self.ctrl_dim] oldstd_na = oldpdist_np[:, self.ctrl_dim:2 * self.ctrl_dim] logp_n = ((-.5) * cgt.square( (a_na - mean_na) / std_na).sum(axis=1)) - logstd_1a.sum() oldlogp_n = ((-.5) * cgt.square( (a_na - oldmean_na) / oldstd_na).sum(axis=1) ) - cgt.log(oldstd_na).sum(axis=1) ratio_n = cgt.exp(logp_n - oldlogp_n) surr = (ratio_n * adv_n).mean() pdists_np = cgt.concatenate([mean_na, std_na], axis=1) # kl = cgt.log(sigafter/) params = nn.get_parameters(surr) oldvar_na = cgt.square(oldstd_na) var_na = cgt.square(std_na) kl = (cgt.log(std_na / oldstd_na) + (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) - .5).sum(axis=1).mean() lam = cgt.scalar() penobj = surr - lam * kl self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self._compute_grad_lagrangian = cgt.function( [lam, oldpdist_np, o_no, a_na, adv_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)])) self.f_pdist = cgt.function([o_no], pdists_np) self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self.pc = ParamCollection(params) def compute_surr_kl(self, *args): return self._compute_surr_kl(*args) def compute_grad_lagrangian(self, *args): return self._compute_grad_lagrangian(*args) def get_stdev(self): return np.exp(self.logstd.op.get_value().ravel()) def step(self, X): pdist_na = self.f_pdist(X) acts_n = meanstd_sample(pdist_na) return {"action": acts_n, "pdist": pdist_na} def compute_entropy(self, pdist_np): return meanstd_entropy(pdist_np) def pdist_ndim(self): return 2 * self.ctrl_dim def get_parameters_flat(self): return self.pc.get_value_flat() def set_parameters_flat(self, th): return self.pc.set_value_flat(th)
def main(): nr.seed(0) parser = argparse.ArgumentParser() parser.add_argument("--data_dir", type=str, default="alice") parser.add_argument("--size_mem", type=int,default=64) parser.add_argument("--size_batch", type=int,default=64) parser.add_argument("--n_layers",type=int,default=2) parser.add_argument("--n_unroll",type=int,default=16) parser.add_argument("--step_size",type=float,default=.01) parser.add_argument("--decay_rate",type=float,default=0.95) parser.add_argument("--n_epochs",type=int,default=20) parser.add_argument("--arch",choices=["lstm","gru"],default="lstm") parser.add_argument("--grad_check",action="store_true") parser.add_argument("--profile",action="store_true") parser.add_argument("--unittest",action="store_true") parser.add_argument("--temperature",type=float,default=1) args = parser.parse_args() cgt.set_precision("quad" if args.grad_check else "single") assert args.n_unroll > 1 loader = Loader(args.data_dir,args.size_batch, args.n_unroll, (1.0,0,0)) network, f_loss, f_loss_and_grad, f_step = make_loss_and_grad_and_step(args.arch, loader.size_vocab, loader.size_vocab, args.size_mem, args.size_batch, args.n_layers, args.n_unroll) if args.profile: profiler.start() params = network.get_parameters() pc = ParamCollection(params) pc.set_value_flat(nr.uniform(-.1, .1, size=(pc.get_total_size(),))) def initialize_hiddens(n): return [np.zeros((n, args.size_mem), cgt.floatX) for _ in xrange(get_num_hiddens(args.arch, args.n_layers))] if args.grad_check: x,y = loader.train_batches_iter().next() prev_hiddens = initialize_hiddens(args.size_batch) def f(thnew): thold = pc.get_value_flat() pc.set_value_flat(thnew) loss = f_loss(x,y, *prev_hiddens) pc.set_value_flat(thold) return loss from cgt.numeric_diff import numeric_grad g_num = numeric_grad(f, pc.get_value_flat(),eps=1e-10) result = f_loss_and_grad(x,y,*prev_hiddens) g_anal = result[1] assert np.allclose(g_num, g_anal, atol=1e-4) print "Gradient check succeeded!" return optim_state = make_rmsprop_state(theta=pc.get_value_flat(), step_size = args.step_size, decay_rate = args.decay_rate) for iepoch in xrange(args.n_epochs): losses = [] tstart = time() print "starting epoch",iepoch cur_hiddens = initialize_hiddens(args.size_batch) for (x,y) in loader.train_batches_iter(): out = f_loss_and_grad(x,y, *cur_hiddens) loss = out[0] grad = out[1] cur_hiddens = out[2:] rmsprop_update(grad, optim_state) pc.set_value_flat(optim_state.theta) losses.append(loss) if args.unittest: return print "%.3f s/batch. avg loss = %.3f"%((time()-tstart)/len(losses), np.mean(losses)) optim_state.step_size *= .98 #pylint: disable=E1101 sample(f_step, initialize_hiddens(1), char2ind=loader.char2ind, n_steps=1000, temperature=args.temperature, seed_text = "") if args.profile: profiler.print_stats()