示例#1
0
def train(args, X, Y, dbg_iter=None, dbg_epoch=None, dbg_done=None):
    dbg_out = []
    net_in, net_out = hybrid_network(args.num_inputs,
                                     args.num_outputs,
                                     args.num_units,
                                     args.num_sto,
                                     dbg_out=dbg_out)
    params, f_step, f_loss, f_grad, f_surr = \
        make_funcs(net_in, net_out, args, dbg_out=dbg_out)
    param_col = ParamCollection(params)
    init_params = nn.init_array(args.init_conf,
                                (param_col.get_total_size(), 1))
    param_col.set_value_flat(init_params.flatten())
    init_params = [
        np.array([[0., 1.]]),  # W_1
        np.array([[0., 0.]]),  # b_1
        np.array([[1.], [1.]]),  # W_3
        np.array([[0.]]),  # b_3
    ]
    param_col.set_values(init_params)
    if 'snapshot' in args:
        print "Loading params from previous snapshot"
        snapshot = pickle.load(open(args['snapshot'], 'r'))
        param_col.set_values(snapshot)
    # param_col.set_value_flat(
    #     np.random.normal(0., 1.,size=param_col.get_total_size())
    # )
    # optim_state = Table(theta=param_col.get_value_flat(),
    #                     scratch=param_col.get_value_flat(),
    #                     step_size=args.step_size
    #                     )

    optim_state = make_rmsprop_state(theta=param_col.get_value_flat(),
                                     step_size=args.step_size,
                                     decay_rate=args.decay_rate)
    for i_epoch in range(args.n_epochs):
        for i_iter in range(X.shape[0]):
            ind = np.random.choice(X.shape[0], args['size_batch'])
            x, y = X[ind], Y[ind]  # not sure this works for multi-dim
            info = f_surr(x, y, num_samples=args['size_sample'])
            loss, loss_surr, grad = info['loss'], info['surr_loss'], info[
                'surr_grad']
            # loss, loss_surr, grad = f_grad(x, y)
            # update
            rmsprop_update(param_col.flatten_values(grad), optim_state)
            # optim_state.scratch = param_col.flatten_values(grad)
            # optim_state.theta -= optim_state.step_size * optim_state.scratch
            param_col.set_value_flat(optim_state.theta)
            print param_col.get_value_flat()
            if dbg_iter:
                dbg_iter(i_epoch, i_iter, param_col, optim_state, info)
        if dbg_epoch: dbg_epoch(i_epoch, param_col, f_surr)
    if dbg_done: dbg_done(param_col, optim_state, f_surr)
    return optim_state
示例#2
0
文件: demo_sfnn.py 项目: TZ2016/cgt
def train(args, X, Y, dbg_iter=None, dbg_epoch=None, dbg_done=None):
    dbg_out = []
    net_in, net_out = hybrid_network(args.num_inputs, args.num_outputs,
                                     args.num_units, args.num_sto,
                                     dbg_out=dbg_out)
    params, f_step, f_loss, f_grad, f_surr = \
        make_funcs(net_in, net_out, args, dbg_out=dbg_out)
    param_col = ParamCollection(params)
    init_params = nn.init_array(args.init_conf, (param_col.get_total_size(), 1))
    param_col.set_value_flat(init_params.flatten())
    init_params = [
        np.array([[0., 1.]]),  # W_1
        np.array([[0., 0.]]),  # b_1
        np.array([[1.], [1.]]),  # W_3
        np.array([[0.]]),  # b_3
    ]
    param_col.set_values(init_params)
    if 'snapshot' in args:
        print "Loading params from previous snapshot"
        snapshot = pickle.load(open(args['snapshot'], 'r'))
        param_col.set_values(snapshot)
    # param_col.set_value_flat(
    #     np.random.normal(0., 1.,size=param_col.get_total_size())
    # )
    # optim_state = Table(theta=param_col.get_value_flat(),
    #                     scratch=param_col.get_value_flat(),
    #                     step_size=args.step_size
    #                     )

    optim_state = make_rmsprop_state(theta=param_col.get_value_flat(),
                                     step_size=args.step_size,
                                     decay_rate=args.decay_rate)
    for i_epoch in range(args.n_epochs):
        for i_iter in range(X.shape[0]):
            ind = np.random.choice(X.shape[0], args['size_batch'])
            x, y = X[ind], Y[ind]  # not sure this works for multi-dim
            info = f_surr(x, y, num_samples=args['size_sample'])
            loss, loss_surr, grad = info['loss'], info['surr_loss'], info['surr_grad']
            # loss, loss_surr, grad = f_grad(x, y)
            # update
            rmsprop_update(param_col.flatten_values(grad), optim_state)
            # optim_state.scratch = param_col.flatten_values(grad)
            # optim_state.theta -= optim_state.step_size * optim_state.scratch
            param_col.set_value_flat(optim_state.theta)
            print param_col.get_value_flat()
            if dbg_iter: dbg_iter(i_epoch, i_iter, param_col, optim_state, info)
        if dbg_epoch: dbg_epoch(i_epoch, param_col, f_surr)
    if dbg_done: dbg_done(param_col, optim_state, f_surr)
    return optim_state
示例#3
0
class MujocoPolicy(PPOPolicy, Serializable):
    def __init__(self, obs_dim, ctrl_dim):

        cgt.set_precision('double')
        Serializable.__init__(self, obs_dim, ctrl_dim)

        self.obs_dim = obs_dim
        self.ctrl_dim = ctrl_dim

        o_no = cgt.matrix("o_no",fixed_shape=(None,obs_dim))
        a_na = cgt.matrix("a_na",fixed_shape = (None, ctrl_dim))
        adv_n = cgt.vector("adv_n")
        oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2*ctrl_dim))
        self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)), name="std_1a")
        std_1a = cgt.exp(logstd_1a)

        # Here's where we apply the network
        h0 = o_no
        nhid = 32
        h1 = cgt.tanh(nn.Affine(obs_dim,nhid,weight_init=nn.IIDGaussian(std=0.1))(h0))
        h2 = cgt.tanh(nn.Affine(nhid,nhid,weight_init=nn.IIDGaussian(std=0.1))(h1))
        mean_na = nn.Affine(nhid,ctrl_dim,weight_init=nn.IIDGaussian(std=0.01))(h2)

        b = cgt.size(o_no, 0)
        std_na = cgt.repeat(std_1a, b, axis=0)

        oldmean_na = oldpdist_np[:, 0:self.ctrl_dim]
        oldstd_na = oldpdist_np[:, self.ctrl_dim:2*self.ctrl_dim]

        logp_n = ((-.5) * cgt.square( (a_na - mean_na) / std_na ).sum(axis=1)) - logstd_1a.sum()
        oldlogp_n = ((-.5) * cgt.square( (a_na - oldmean_na) / oldstd_na ).sum(axis=1)) - cgt.log(oldstd_na).sum(axis=1)

        ratio_n = cgt.exp(logp_n - oldlogp_n)

        surr = (ratio_n*adv_n).mean()

        pdists_np = cgt.concatenate([mean_na, std_na], axis=1)
        # kl = cgt.log(sigafter/)

        params = nn.get_parameters(surr)

        oldvar_na = cgt.square(oldstd_na)
        var_na = cgt.square(std_na)
        kl = (cgt.log(std_na / oldstd_na) + (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) - .5).sum(axis=1).mean()


        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl])
        self._compute_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_na, adv_n], 
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)]))
        self.f_pdist = cgt.function([o_no], pdists_np)

        self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl])

        self.pc = ParamCollection(params)

    def compute_surr_kl(self, *args):
        return self._compute_surr_kl(*args)

    def compute_grad_lagrangian(self, *args):
        return self._compute_grad_lagrangian(*args)

    def get_stdev(self):
        return np.exp(self.logstd.op.get_value().ravel())

    def step(self, X):
        pdist_na = self.f_pdist(X)
        acts_n = meanstd_sample(pdist_na)
        return {
            "action" : acts_n,
            "pdist" : pdist_na
        }

    def compute_entropy(self, pdist_np):
        return meanstd_entropy(pdist_np)

    def pdist_ndim(self):
        return 2*self.ctrl_dim

    def get_parameters_flat(self):
        return self.pc.get_value_flat()

    def set_parameters_flat(self,th):
        return self.pc.set_value_flat(th)
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--grad_check", action="store_true")
    parser.add_argument("--n_batches", type=int, default=1000000)
    parser.add_argument("--profile", action="store_true")
    parser.add_argument("--unittest", action="store_true")
    args = parser.parse_args()
    np.seterr("raise")

    cgt.set_precision("quad" if args.grad_check else "double")
    np.random.seed(0)

    # model parameters
    if args.grad_check:
        opt = NTMOpts(
            b=1,  # batch size
            h=1,  # number of heads
            n=2,  # number of memory sites
            m=3,  # dimension at each memory site
            k=4,  # dimension of input
            p=2,  # dimension of output
            ff_hid_sizes=[])
        seq_length = 2

    else:
        opt = NTMOpts(
            b=64,  # batch size
            h=3,  # number of heads
            n=128,  # number of memory sites
            m=20,  # dimension at each memory site
            k=3,  # dimension of input
            p=1,  # dimension of output
            ff_hid_sizes=[128, 128])

        seq_length = 10

    if args.unittest:
        seq_length = 3
        args.n_batches = 3

    tstart = time.time()
    ntm = make_ntm(opt)
    task = CopyTask(opt.b, seq_length, opt.p)
    f_loss, f_loss_and_grad, params = make_funcs(opt, ntm, task.total_time(),
                                                 task.loss_timesteps())
    print "graph construction and compilation took %g seconds" % (time.time() -
                                                                  tstart)

    pc = ParamCollection(params)
    pc.set_value_flat(nr.uniform(-.1, .1, size=(pc.get_total_size(), )))

    if args.grad_check:
        x, y = task.gen_batch()

        def f(thnew):
            thold = th.copy()
            pc.set_value_flat(thnew)
            loss = f_loss(x, y)
            pc.set_value_flat(thold)
            return loss

        from cgt.numeric_diff import numeric_grad
        g_num = numeric_grad(f, th, eps=1e-8)
        _, _, g_anal = f_loss_and_grad(x, y)
        assert np.allclose(g_num, g_anal, atol=1e-8)
        print "Gradient check succeeded!"
        print "%i/%i elts of grad are nonzero" % (
            (g_anal != 0).sum(), g_anal.size)
        return

    seq_num = 0
    state = make_rmsprop_state(pc.get_value_flat(), .01, .95)
    print fmt_row(13, ["seq num", "CE (bits)", "accuracy", "|g|_inf"],
                  header=True)

    if args.profile: cgt.profiler.start()

    for i in xrange(args.n_batches):
        x, y = task.gen_batch()
        seq_num += x.shape[1]
        l, l01, g = f_loss_and_grad(x, y)
        print fmt_row(13, [seq_num, l, l01, np.abs(g).max()])
        rmsprop_update(g, state)
        pc.set_value_flat(state.theta)
        if not np.isfinite(l): break

    if args.profile: cgt.profiler.print_stats()
示例#5
0
class AtariRAMPolicy(PPOPolicy, Serializable):
    def __init__(self, n_actions):
        Serializable.__init__(self, n_actions)
        cgt.set_precision('double')
        n_in = 128
        o_no = cgt.matrix("o_no",fixed_shape=(None,n_in))
        a_n = cgt.vector("a_n",dtype='i8')
        q_n = cgt.vector("q_n")
        oldpdist_np = cgt.matrix("oldpdists")

        nhid, nhid2 = 64, 64
        h0 = (o_no - 128.0)/128.0
        d0 = nn.dropout(h1, .2)

        h1 = nn.rectify(nn.Affine(128,nhid,weight_init=nn.IIDGaussian(std=.1))(d0))
        d1 = nn.dropout(h1, .2)
        h2 = nn.rectify(nn.Affine(nhid,nhid2,weight_init=nn.IIDGaussian(std=.1))(d1))
        # d2 = nn.dropout(h2, .2)
        probs_na = nn.softmax(nn.Affine(nhid2,n_actions,weight_init=nn.IIDGaussian(std=0.01))(d2))
        logprobs_na = cgt.log(probs_na)
        b = cgt.size(o_no, 0)
        logps_n = logprobs_na[cgt.arange(b), a_n]
        surr = (logps_n*q_n).mean()
        kl = (oldpdist_np * cgt.log(oldpdist_np/probs_na)).sum(axis=1).mean()

        params = nn.get_parameters(surr)
        gradsurr = cgt.grad(surr, params)
        flatgrad = cgt.concatenate([p.flatten() for p in gradsurr])

        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._f_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_n, q_n], 
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)]))
        self.f_pdist = cgt.function([o_no], probs_na)

        self.f_probs = cgt.function([o_no], probs_na)
        self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n], [surr, kl])
        self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad)

        self.pc = ParamCollection(params)

    def step(self, X):
        pdist_na = self.f_probs(X)
        acts_n = cat_sample(pdist_na)
        return {
            "action" : acts_n,
            "pdist" : pdist_na
        }

    def compute_gradient(self, pdist_np, o_no, a_n, q_n):
        return self.f_gradlogp(pdist_np, o_no, a_n, q_n)

    def compute_surr_kl(self, pdist_np, o_no, a_n, q_n):
        return self.f_surr_kl(pdist_np, o_no, a_n, q_n)

    def compute_grad_lagrangian(self, lam, pdist_np, o_no, a_n, q_n):
        return self._f_grad_lagrangian(lam, pdist_np, o_no, a_n, q_n)

    def compute_entropy(self, pdist_np):
        return cat_entropy(pdist_np)

    def get_parameters_flat(self):
        return self.pc.get_value_flat()

    def set_parameters_flat(self,th):
        return self.pc.set_value_flat(th)
示例#6
0
文件: rrnn.py 项目: zobot/rrnn
def main():

    nr.seed(0)

    parser = argparse.ArgumentParser()
    parser.add_argument("--data_dir", type=str, default="alice")
    parser.add_argument("--size_mem", type=int,default=64)
    parser.add_argument("--size_batch", type=int,default=64)
    parser.add_argument("--n_layers",type=int,default=2)
    parser.add_argument("--n_unroll",type=int,default=16)
    parser.add_argument("--k_in",type=int,default=3)
    parser.add_argument("--k_h",type=int,default=5)
    parser.add_argument("--step_size",type=float,default=.01)
    parser.add_argument("--decay_rate",type=float,default=0.95)
    parser.add_argument("--n_epochs",type=int,default=20)
    parser.add_argument("--arch",choices=["lstm","gru"],default="gru")
    parser.add_argument("--grad_check",action="store_true")
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--unittest",action="store_true")

    args = parser.parse_args()

    cgt.set_precision("quad" if args.grad_check else "single")

    assert args.n_unroll > 1

    loader = Loader(args.data_dir,args.size_batch, args.n_unroll, (.8,.1,.1))

    network, f_loss, f_loss_and_grad, f_step = make_loss_and_grad_and_step(args.arch, loader.size_vocab, 
        loader.size_vocab, args.size_mem, args.size_batch, args.n_layers, args.n_unroll, args.k_in, args.k_h)

    if args.profile: profiler.start()

    params = network.get_parameters()
    pc = ParamCollection(params)
    pc.set_value_flat(nr.uniform(-0.01, 0.01, size=(pc.get_total_size(),)))

    for i, param in enumerate(pc.params):
        if "is_rotation" in param.props:
            shape = pc.get_shapes()[i]
            num_vec = int(shape[0] / 2)
            size_vec = int(shape[1])
            gauss = nr.normal(size=(num_vec * size_vec))
            gauss = np.reshape(gauss, (num_vec, size_vec))
            gauss_mag = norm(gauss, axis=1, keepdims=True)
            gauss_normed = gauss / gauss_mag
            gauss_perturb = nr.normal(scale=0.01, size=(num_vec * size_vec))
            gauss_perturb = np.reshape(gauss_perturb, (num_vec, size_vec))
            second_vec = gauss_normed + gauss_perturb
            second_vec_mag = norm(second_vec, axis=1, keepdims=True)
            second_vec_normed = second_vec / second_vec_mag
            new_param_value = np.zeros(shape)
            for j in xrange(num_vec):
                new_param_value[2 * j, :] = gauss_normed[j, :]
                new_param_value[2 * j + 1, :] = second_vec_normed[j, :]
            param.op.set_value(new_param_value)
            #print new_param_value



    def initialize_hiddens(n):
        return [np.ones((n, args.size_mem), cgt.floatX) / float(args.size_mem) for _ in xrange(get_num_hiddens(args.arch, args.n_layers))]

    if args.grad_check:
    #if True:
        x,y = loader.train_batches_iter().next()
        prev_hiddens = initialize_hiddens(args.size_batch)
        def f(thnew):
            thold = pc.get_value_flat()
            pc.set_value_flat(thnew)
            loss = f_loss(x,y, *prev_hiddens)
            pc.set_value_flat(thold)
            return loss
        from cgt.numeric_diff import numeric_grad
        print "Beginning grad check"
        g_num = numeric_grad(f, pc.get_value_flat(),eps=1e-10)
        print "Ending grad check"
        result = f_loss_and_grad(x,y,*prev_hiddens)
        g_anal = result[1]
        diff = g_num - g_anal
        abs_diff = np.abs(diff)
        print np.where(abs_diff > 1e-4)
        print diff[np.where(abs_diff > 1e-4)]
        embed()
        assert np.allclose(g_num, g_anal, atol=1e-4)
        print "Gradient check succeeded!"
        return

    optim_state = make_rmsprop_state(theta=pc.get_value_flat(), step_size = args.step_size, 
        decay_rate = args.decay_rate)

    for iepoch in xrange(args.n_epochs):
        losses = []
        tstart = time()
        print "starting epoch",iepoch
        cur_hiddens = initialize_hiddens(args.size_batch)
        for (x,y) in loader.train_batches_iter():
            out = f_loss_and_grad(x,y, *cur_hiddens)
            loss = out[0]
            grad = out[1]
            cur_hiddens = out[2:]
            rmsprop_update(grad, optim_state)
            pc.set_value_flat(optim_state.theta)
            losses.append(loss)
            if args.unittest: return
        print "%.3f s/batch. avg loss = %.3f"%((time()-tstart)/len(losses), np.mean(losses))
        optim_state.step_size *= .98 #pylint: disable=E1101

        sample(f_step, initialize_hiddens(1), char2ind=loader.char2ind, n_steps=300, temp=1.0, seed_text = "")

    if args.profile: profiler.print_stats()
示例#7
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--grad_check",action="store_true")
    parser.add_argument("--n_batches",type=int,default=1000000)
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--unittest", action="store_true")
    parser.add_argument("--task",choices=["copy","reverse_copy","repeat_copy"],default="copy")
    args = parser.parse_args()
    np.seterr("raise")

    cgt.set_precision("quad" if args.grad_check else "double")
    np.random.seed(0)

    # model parameters
    if args.grad_check:
        opt = NTMOpts(
            b = 1, # batch size
            h = 1, # number of heads
            n = 2, # number of memory sites
            m = 3, # dimension at each memory site
            k = 4, # dimension of input
            p = 2, # dimension of output
            ff_hid_sizes = []
        )
        seq_length = 2

    else:
        opt = NTMOpts(
            b = 64, # batch size
            h = 3, # number of heads
            n = 128, # number of memory sites
            m = 20, # dimension at each memory site
            k = 3, # dimension of input
            p = 1, # dimension of output
            ff_hid_sizes = [128,128]
        )

        seq_length = 10


    if args.unittest:
        seq_length=3
        args.n_batches=3
        


    tstart = time.time()
    ntm = make_ntm(opt)
    if args.task == "copy":
        task = CopyTask(opt.b, seq_length, opt.p)
    elif args.task == "reverse_copy":
        task = ReverseCopyTask(opt.b, seq_length, opt.p)
    elif args.task == "repeat_copy":
        n_copies = 4
        task = RepeatCopyTask(opt.b, seq_length, opt.p, n_copies)


    f_loss, f_loss_and_grad, params = make_funcs(opt, ntm, task.total_time(), task.loss_timesteps())
    print "graph construction and compilation took %g seconds"%(time.time()-tstart)

    pc = ParamCollection(params)
    pc.set_value_flat(nr.uniform(-.1, .1, size=(pc.get_total_size(),)))

    if args.grad_check:
        x,y = task.gen_batch()
        def f(thnew):
            thold = th.copy()
            pc.set_value_flat(thnew)
            loss = f_loss(x,y)
            pc.set_value_flat(thold)
            return loss
        from cgt.numeric_diff import numeric_grad
        g_num = numeric_grad(f, th,eps=1e-8)
        _, _, g_anal = f_loss_and_grad(x,y)
        assert np.allclose(g_num, g_anal, atol=1e-8)
        print "Gradient check succeeded!"
        print "%i/%i elts of grad are nonzero"%( (g_anal != 0).sum(), g_anal.size )
        return


    seq_num = 0
    state = make_rmsprop_state(pc.get_value_flat(), .01, .95)
    print fmt_row(13, ["seq num", "CE (bits)", "accuracy", "|g|_inf"], header=True)
    
    if args.profile: cgt.profiler.start()
    
    for i in xrange(args.n_batches):
        x,y = task.gen_batch()
        seq_num += x.shape[1]
        l,l01,g = f_loss_and_grad(x,y)
        print fmt_row(13, [seq_num, l,l01,np.abs(g).max()])
        rmsprop_update(g, state)        
        pc.set_value_flat(state.theta)
        if not np.isfinite(l): break

    
    if args.profile: cgt.profiler.print_stats()
示例#8
0
def main():

    nr.seed(0)

    parser = argparse.ArgumentParser()
    parser.add_argument("--data_dir", type=str, default="alice")
    parser.add_argument("--size_mem", type=int, default=64)
    parser.add_argument("--size_batch", type=int, default=64)
    parser.add_argument("--n_layers", type=int, default=2)
    parser.add_argument("--n_unroll", type=int, default=16)
    parser.add_argument("--step_size", type=float, default=.01)
    parser.add_argument("--decay_rate", type=float, default=0.95)
    parser.add_argument("--n_epochs", type=int, default=20)
    parser.add_argument("--arch", choices=["lstm", "gru"], default="lstm")
    parser.add_argument("--grad_check", action="store_true")
    parser.add_argument("--profile", action="store_true")
    parser.add_argument("--unittest", action="store_true")

    args = parser.parse_args()

    cgt.set_precision("quad" if args.grad_check else "single")

    assert args.n_unroll > 1

    loader = Loader(args.data_dir, args.size_batch, args.n_unroll,
                    (.8, .1, .1))

    network, f_loss, f_loss_and_grad, f_step = make_loss_and_grad_and_step(
        args.arch, loader.size_vocab, loader.size_vocab, args.size_mem,
        args.size_batch, args.n_layers, args.n_unroll)

    if args.profile: profiler.start()

    params = network.get_parameters()
    pc = ParamCollection(params)
    pc.set_value_flat(nr.uniform(-.1, .1, size=(pc.get_total_size(), )))

    def initialize_hiddens(n):
        return [
            np.zeros((n, args.size_mem), cgt.floatX)
            for _ in xrange(get_num_hiddens(args.arch, args.n_layers))
        ]

    if args.grad_check:
        x, y = loader.train_batches_iter().next()
        prev_hiddens = initialize_hiddens(args.size_batch)

        def f(thnew):
            thold = pc.get_value_flat()
            pc.set_value_flat(thnew)
            loss = f_loss(x, y, *prev_hiddens)
            pc.set_value_flat(thold)
            return loss

        from cgt.numeric_diff import numeric_grad
        g_num = numeric_grad(f, pc.get_value_flat(), eps=1e-10)
        result = f_loss_and_grad(x, y, *prev_hiddens)
        g_anal = result[1]
        assert np.allclose(g_num, g_anal, atol=1e-4)
        print "Gradient check succeeded!"
        return

    optim_state = make_rmsprop_state(theta=pc.get_value_flat(),
                                     step_size=args.step_size,
                                     decay_rate=args.decay_rate)

    for iepoch in xrange(args.n_epochs):
        losses = []
        tstart = time()
        print "starting epoch", iepoch
        cur_hiddens = initialize_hiddens(args.size_batch)
        for (x, y) in loader.train_batches_iter():
            out = f_loss_and_grad(x, y, *cur_hiddens)
            loss = out[0]
            grad = out[1]
            cur_hiddens = out[2:]
            rmsprop_update(grad, optim_state)
            pc.set_value_flat(optim_state.theta)
            losses.append(loss)
            if args.unittest: return
        print "%.3f s/batch. avg loss = %.3f" % (
            (time() - tstart) / len(losses), np.mean(losses))
        optim_state.step_size *= .98  #pylint: disable=E1101

        sample(f_step,
               initialize_hiddens(1),
               char2ind=loader.char2ind,
               n_steps=300,
               temp=1.0,
               seed_text="")

    if args.profile: profiler.print_stats()
示例#9
0
class AtariRAMPolicy(PPOPolicy, Serializable):
    def __init__(self, n_actions):
        Serializable.__init__(self, n_actions)
        cgt.set_precision('double')
        n_in = 128
        o_no = cgt.matrix("o_no", fixed_shape=(None, n_in))
        a_n = cgt.vector("a_n", dtype='i8')
        q_n = cgt.vector("q_n")
        oldpdist_np = cgt.matrix("oldpdists")

        h0 = (o_no - 128.0) / 128.0
        nhid = 64
        h1 = cgt.tanh(
            nn.Affine(128, nhid, weight_init=nn.IIDGaussian(std=.1))(h0))
        probs_na = nn.softmax(
            nn.Affine(nhid, n_actions,
                      weight_init=nn.IIDGaussian(std=0.01))(h1))
        logprobs_na = cgt.log(probs_na)
        b = cgt.size(o_no, 0)
        logps_n = logprobs_na[cgt.arange(b), a_n]
        surr = (logps_n * q_n).mean()
        kl = (oldpdist_np * cgt.log(oldpdist_np / probs_na)).sum(axis=1).mean()

        params = nn.get_parameters(surr)
        gradsurr = cgt.grad(surr, params)
        flatgrad = cgt.concatenate([p.flatten() for p in gradsurr])

        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._f_grad_lagrangian = cgt.function(
            [lam, oldpdist_np, o_no, a_n, q_n],
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)]))
        self.f_pdist = cgt.function([o_no], probs_na)

        self.f_probs = cgt.function([o_no], probs_na)
        self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n],
                                      [surr, kl])
        self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad)

        self.pc = ParamCollection(params)

    def step(self, X):
        pdist_na = self.f_probs(X)
        acts_n = cat_sample(pdist_na)
        return {"action": acts_n, "pdist": pdist_na}

    def compute_gradient(self, pdist_np, o_no, a_n, q_n):
        return self.f_gradlogp(pdist_np, o_no, a_n, q_n)

    def compute_surr_kl(self, pdist_np, o_no, a_n, q_n):
        return self.f_surr_kl(pdist_np, o_no, a_n, q_n)

    def compute_grad_lagrangian(self, lam, pdist_np, o_no, a_n, q_n):
        return self._f_grad_lagrangian(lam, pdist_np, o_no, a_n, q_n)

    def compute_entropy(self, pdist_np):
        return cat_entropy(pdist_np)

    def get_parameters_flat(self):
        return self.pc.get_value_flat()

    def set_parameters_flat(self, th):
        return self.pc.set_value_flat(th)
示例#10
0
文件: rrnn.py 项目: zoemcc/rrnn
def main():

    nr.seed(0)

    parser = argparse.ArgumentParser()
    parser.add_argument("--data_dir", type=str, default="alice")
    parser.add_argument("--size_mem", type=int, default=64)
    parser.add_argument("--size_batch", type=int, default=64)
    parser.add_argument("--n_layers", type=int, default=2)
    parser.add_argument("--n_unroll", type=int, default=16)
    parser.add_argument("--k_in", type=int, default=3)
    parser.add_argument("--k_h", type=int, default=5)
    parser.add_argument("--step_size", type=float, default=.01)
    parser.add_argument("--decay_rate", type=float, default=0.95)
    parser.add_argument("--n_epochs", type=int, default=20)
    parser.add_argument("--arch", choices=["lstm", "gru"], default="gru")
    parser.add_argument("--grad_check", action="store_true")
    parser.add_argument("--profile", action="store_true")
    parser.add_argument("--unittest", action="store_true")

    args = parser.parse_args()

    cgt.set_precision("quad" if args.grad_check else "single")

    assert args.n_unroll > 1

    loader = Loader(args.data_dir, args.size_batch, args.n_unroll,
                    (.8, .1, .1))

    network, f_loss, f_loss_and_grad, f_step = make_loss_and_grad_and_step(
        args.arch, loader.size_vocab, loader.size_vocab, args.size_mem,
        args.size_batch, args.n_layers, args.n_unroll, args.k_in, args.k_h)

    if args.profile: profiler.start()

    params = network.get_parameters()
    pc = ParamCollection(params)
    pc.set_value_flat(nr.uniform(-0.01, 0.01, size=(pc.get_total_size(), )))

    for i, param in enumerate(pc.params):
        if "is_rotation" in param.props:
            shape = pc.get_shapes()[i]
            num_vec = int(shape[0] / 2)
            size_vec = int(shape[1])
            gauss = nr.normal(size=(num_vec * size_vec))
            gauss = np.reshape(gauss, (num_vec, size_vec))
            gauss_mag = norm(gauss, axis=1, keepdims=True)
            gauss_normed = gauss / gauss_mag
            gauss_perturb = nr.normal(scale=0.01, size=(num_vec * size_vec))
            gauss_perturb = np.reshape(gauss_perturb, (num_vec, size_vec))
            second_vec = gauss_normed + gauss_perturb
            second_vec_mag = norm(second_vec, axis=1, keepdims=True)
            second_vec_normed = second_vec / second_vec_mag
            new_param_value = np.zeros(shape)
            for j in xrange(num_vec):
                new_param_value[2 * j, :] = gauss_normed[j, :]
                new_param_value[2 * j + 1, :] = second_vec_normed[j, :]
            param.op.set_value(new_param_value)
            #print new_param_value

    def initialize_hiddens(n):
        return [
            np.ones((n, args.size_mem), cgt.floatX) / float(args.size_mem)
            for _ in xrange(get_num_hiddens(args.arch, args.n_layers))
        ]

    if args.grad_check:
        #if True:
        x, y = loader.train_batches_iter().next()
        prev_hiddens = initialize_hiddens(args.size_batch)

        def f(thnew):
            thold = pc.get_value_flat()
            pc.set_value_flat(thnew)
            loss = f_loss(x, y, *prev_hiddens)
            pc.set_value_flat(thold)
            return loss

        from cgt.numeric_diff import numeric_grad
        print "Beginning grad check"
        g_num = numeric_grad(f, pc.get_value_flat(), eps=1e-10)
        print "Ending grad check"
        result = f_loss_and_grad(x, y, *prev_hiddens)
        g_anal = result[1]
        diff = g_num - g_anal
        abs_diff = np.abs(diff)
        print np.where(abs_diff > 1e-4)
        print diff[np.where(abs_diff > 1e-4)]
        embed()
        assert np.allclose(g_num, g_anal, atol=1e-4)
        print "Gradient check succeeded!"
        return

    optim_state = make_rmsprop_state(theta=pc.get_value_flat(),
                                     step_size=args.step_size,
                                     decay_rate=args.decay_rate)

    for iepoch in xrange(args.n_epochs):
        losses = []
        tstart = time()
        print "starting epoch", iepoch
        cur_hiddens = initialize_hiddens(args.size_batch)
        for (x, y) in loader.train_batches_iter():
            out = f_loss_and_grad(x, y, *cur_hiddens)
            loss = out[0]
            grad = out[1]
            cur_hiddens = out[2:]
            rmsprop_update(grad, optim_state)
            pc.set_value_flat(optim_state.theta)
            losses.append(loss)
            if args.unittest: return
        print "%.3f s/batch. avg loss = %.3f" % (
            (time() - tstart) / len(losses), np.mean(losses))
        optim_state.step_size *= .98  #pylint: disable=E1101

        sample(f_step,
               initialize_hiddens(1),
               char2ind=loader.char2ind,
               n_steps=300,
               temp=1.0,
               seed_text="")

    if args.profile: profiler.print_stats()
示例#11
0
class MujocoPolicy(PPOPolicy, Serializable):
    def __init__(self, obs_dim, ctrl_dim):

        cgt.set_precision('double')
        Serializable.__init__(self, obs_dim, ctrl_dim)

        self.obs_dim = obs_dim
        self.ctrl_dim = ctrl_dim

        o_no = cgt.matrix("o_no", fixed_shape=(None, obs_dim))
        a_na = cgt.matrix("a_na", fixed_shape=(None, ctrl_dim))
        adv_n = cgt.vector("adv_n")
        oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2 * ctrl_dim))
        self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)),
                                               name="std_1a")
        std_1a = cgt.exp(logstd_1a)

        # Here's where we apply the network
        h0 = o_no
        nhid = 32
        h1 = cgt.tanh(
            nn.Affine(obs_dim, nhid, weight_init=nn.IIDGaussian(std=0.1))(h0))
        h2 = cgt.tanh(
            nn.Affine(nhid, nhid, weight_init=nn.IIDGaussian(std=0.1))(h1))
        mean_na = nn.Affine(nhid,
                            ctrl_dim,
                            weight_init=nn.IIDGaussian(std=0.01))(h2)

        b = cgt.size(o_no, 0)
        std_na = cgt.repeat(std_1a, b, axis=0)

        oldmean_na = oldpdist_np[:, 0:self.ctrl_dim]
        oldstd_na = oldpdist_np[:, self.ctrl_dim:2 * self.ctrl_dim]

        logp_n = ((-.5) * cgt.square(
            (a_na - mean_na) / std_na).sum(axis=1)) - logstd_1a.sum()
        oldlogp_n = ((-.5) * cgt.square(
            (a_na - oldmean_na) / oldstd_na).sum(axis=1)
                     ) - cgt.log(oldstd_na).sum(axis=1)

        ratio_n = cgt.exp(logp_n - oldlogp_n)

        surr = (ratio_n * adv_n).mean()

        pdists_np = cgt.concatenate([mean_na, std_na], axis=1)
        # kl = cgt.log(sigafter/)

        params = nn.get_parameters(surr)

        oldvar_na = cgt.square(oldstd_na)
        var_na = cgt.square(std_na)
        kl = (cgt.log(std_na / oldstd_na) +
              (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) -
              .5).sum(axis=1).mean()

        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n],
                                             [surr, kl])
        self._compute_grad_lagrangian = cgt.function(
            [lam, oldpdist_np, o_no, a_na, adv_n],
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)]))
        self.f_pdist = cgt.function([o_no], pdists_np)

        self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n],
                                   [surr, kl])

        self.pc = ParamCollection(params)

    def compute_surr_kl(self, *args):
        return self._compute_surr_kl(*args)

    def compute_grad_lagrangian(self, *args):
        return self._compute_grad_lagrangian(*args)

    def get_stdev(self):
        return np.exp(self.logstd.op.get_value().ravel())

    def step(self, X):
        pdist_na = self.f_pdist(X)
        acts_n = meanstd_sample(pdist_na)
        return {"action": acts_n, "pdist": pdist_na}

    def compute_entropy(self, pdist_np):
        return meanstd_entropy(pdist_np)

    def pdist_ndim(self):
        return 2 * self.ctrl_dim

    def get_parameters_flat(self):
        return self.pc.get_value_flat()

    def set_parameters_flat(self, th):
        return self.pc.set_value_flat(th)
示例#12
0
def main():

    nr.seed(0)

    parser = argparse.ArgumentParser()
    parser.add_argument("--data_dir", type=str, default="alice")
    parser.add_argument("--size_mem", type=int,default=64)
    parser.add_argument("--size_batch", type=int,default=64)
    parser.add_argument("--n_layers",type=int,default=2)
    parser.add_argument("--n_unroll",type=int,default=16)
    parser.add_argument("--step_size",type=float,default=.01)
    parser.add_argument("--decay_rate",type=float,default=0.95)
    parser.add_argument("--n_epochs",type=int,default=20)
    parser.add_argument("--arch",choices=["lstm","gru"],default="lstm")
    parser.add_argument("--grad_check",action="store_true")
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--unittest",action="store_true")
    parser.add_argument("--temperature",type=float,default=1)

    args = parser.parse_args()

    cgt.set_precision("quad" if args.grad_check else "single")

    assert args.n_unroll > 1

    loader = Loader(args.data_dir,args.size_batch, args.n_unroll, (1.0,0,0))

    network, f_loss, f_loss_and_grad, f_step = make_loss_and_grad_and_step(args.arch, loader.size_vocab, 
        loader.size_vocab, args.size_mem, args.size_batch, args.n_layers, args.n_unroll)

    if args.profile: profiler.start()

    params = network.get_parameters()
    pc = ParamCollection(params)
    pc.set_value_flat(nr.uniform(-.1, .1, size=(pc.get_total_size(),)))

    def initialize_hiddens(n):
        return [np.zeros((n, args.size_mem), cgt.floatX) for _ in xrange(get_num_hiddens(args.arch, args.n_layers))]

    if args.grad_check:
        x,y = loader.train_batches_iter().next()
        prev_hiddens = initialize_hiddens(args.size_batch)
        def f(thnew):
            thold = pc.get_value_flat()
            pc.set_value_flat(thnew)
            loss = f_loss(x,y, *prev_hiddens)
            pc.set_value_flat(thold)
            return loss
        from cgt.numeric_diff import numeric_grad
        g_num = numeric_grad(f, pc.get_value_flat(),eps=1e-10)
        result = f_loss_and_grad(x,y,*prev_hiddens)
        g_anal = result[1]
        assert np.allclose(g_num, g_anal, atol=1e-4)
        print "Gradient check succeeded!"
        return

    optim_state = make_rmsprop_state(theta=pc.get_value_flat(), step_size = args.step_size, 
        decay_rate = args.decay_rate)

    for iepoch in xrange(args.n_epochs):
        losses = []
        tstart = time()
        print "starting epoch",iepoch
        cur_hiddens = initialize_hiddens(args.size_batch)
        for (x,y) in loader.train_batches_iter():
            out = f_loss_and_grad(x,y, *cur_hiddens)
            loss = out[0]
            grad = out[1]
            cur_hiddens = out[2:]
            rmsprop_update(grad, optim_state)
            pc.set_value_flat(optim_state.theta)
            losses.append(loss)
            if args.unittest: return
        print "%.3f s/batch. avg loss = %.3f"%((time()-tstart)/len(losses), np.mean(losses))
        optim_state.step_size *= .98 #pylint: disable=E1101

        sample(f_step, initialize_hiddens(1), char2ind=loader.char2ind, n_steps=1000, temperature=args.temperature, seed_text = "")

    if args.profile: profiler.print_stats()