def test_plant(): """Example of a network using a dynamic plant as the output layer.""" eps = 1e-6 # value to use for finite differences computations dt = 1e-2 # size of time step sig_len = 40 # how many time steps to train over batch_size = 32 # how many updates to perform with static input num_batches = 20000 # how many batches to run total import sys # NOTE: Change to wherever you keep your arm models sys.path.append("../../../studywolf_control/studywolf_control/") from arms.two_link.arm_python import Arm as Arm print 'Plant is: ', Arm arm = Arm(dt=dt, init_q=[0.736134824578, 1.85227640003]) num_states = arm.DOF * 2 # are states are [positions, velocities] targets = gen_targets(arm=arm, sig_len=sig_len) # target joint angles init_state = np.zeros((len(targets), num_states)) # initial velocity = 0 init_state[:, :arm.DOF] = arm.init_q # set up the initial joint angles plant = PlantArm(arm=arm, targets=targets, init_state=init_state, eps=eps) # open up weights folder and checked for saved weights import glob files = sorted(glob.glob('weights/rnn*')) if len(files) > 0: # if weights found, load them up and keep going from last trial W = np.load(files[-1])['arr_0'] print 'loading from ', files[-1] last_trial = int( files[-1].split('weights/rnn_weights-trial')[1].split('-err')[0]) print 'last_trial: ', last_trial else: # if no weights found, start fresh with new random seed W = None last_trial = -1 seed = np.random.randint(100000000) print 'seed : ', seed np.random.seed(seed) # specify the network structure and loss functions from hessianfree.loss_funcs import SquaredError, SparseL2 rnn = RNNet( # specify the number of nodes in each layer shape=[num_states * 2, 32, 32, num_states, num_states], # specify the function of the nodes in each layer layers=[Linear(), Tanh(), Tanh(), Linear(), plant], # specify the layers that have recurrent connections rec_layers=[1, 2], # specify the connections between layers conns={ 0: [1, 2], 1: [2], 2: [3], 3: [4] }, # specify the loss function loss_type=[ # squared error between plant output and targets SquaredError() ], load_weights=W, use_GPU=False) # set up masking so that weights between network output # and the plant aren't modified in learning, always = 1 offset, W_end, b_end = rnn.offsets[(3, 4)] rnn.mask = np.zeros(rnn.W.shape, dtype=bool) rnn.mask[offset:b_end] = True rnn.W[offset:W_end] = np.eye(4).flatten() for ii in range(last_trial + 1, num_batches): print '=============================================' print 'training batch ', ii err = rnn.run_batches(plant, None, max_epochs=batch_size, optimizer=HessianFree(CG_iter=96, init_damping=100)) # save the weights to file, track trial and error err = rnn.best_error name = 'weights/rnn_weights-trial%04i-err%.5f' % (ii, err) np.savez_compressed(name, rnn.W) print '=============================================' print 'network: ', name print 'final error: ', err print '=============================================' return rnn.best_error
def test_plant(): """Example of a network using a dynamic plant as the output layer.""" eps = 1e-6 # value to use for finite differences computations dt = 1e-2 # size of time step sig_len = 40 # how many time steps to train over batch_size = 32 # how many updates to perform with static input num_batches = 20000 # how many batches to run total import sys # NOTE: Change to wherever you keep your arm models sys.path.append("../../../studywolf_control/studywolf_control/") from arms.two_link.arm_python import Arm as Arm print 'Plant is: ', Arm arm = Arm(dt=dt, init_q=[0.736134824578, 1.85227640003]) num_states = arm.DOF * 2 # are states are [positions, velocities] targets = gen_targets(arm=arm, sig_len=sig_len) # target joint angles init_state = np.zeros((len(targets), num_states)) # initial velocity = 0 init_state[:, :arm.DOF] = arm.init_q # set up the initial joint angles plant = PlantArm(arm=arm, targets=targets, init_state=init_state, eps=eps) # open up weights folder and checked for saved weights import glob files = sorted(glob.glob('weights/rnn*')) if len(files) > 0: # if weights found, load them up and keep going from last trial W = np.load(files[-1])['arr_0'] print 'loading from ', files[-1] last_trial = int(files[-1].split('weights/rnn_weights-trial')[1].split('-err')[0]) print 'last_trial: ', last_trial else: # if no weights found, start fresh with new random seed W = None last_trial = -1 seed = np.random.randint(100000000) print 'seed : ', seed np.random.seed(seed) # specify the network structure and loss functions from hessianfree.loss_funcs import SquaredError, SparseL2 rnn = RNNet( # specify the number of nodes in each layer shape=[num_states * 2, 32, 32, num_states, num_states], # specify the function of the nodes in each layer layers=[Linear(), Tanh(), Tanh(), Linear(), plant], # specify the layers that have recurrent connections rec_layers=[1,2], # specify the connections between layers conns={0:[1, 2], 1:[2], 2:[3], 3:[4]}, # specify the loss function loss_type=[ # squared error between plant output and targets SquaredError()], load_weights=W, use_GPU=False) # set up masking so that weights between network output # and the plant aren't modified in learning, always = 1 offset, W_end, b_end = rnn.offsets[(3,4)] rnn.mask = np.zeros(rnn.W.shape, dtype=bool) rnn.mask[offset:b_end] = True rnn.W[offset:W_end] = np.eye(4).flatten() for ii in range(last_trial+1, num_batches): print '=============================================' print 'training batch ', ii err = rnn.run_batches(plant, None, max_epochs=batch_size, optimizer=HessianFree(CG_iter=96, init_damping=100)) # save the weights to file, track trial and error err = rnn.best_error name = 'weights/rnn_weights-trial%04i-err%.5f'%(ii, err) np.savez_compressed(name, rnn.W) print '=============================================' print 'network: ', name print 'final error: ', err print '=============================================' return rnn.best_error