def generate_test(): mode = 1 T = 500 new_env = physic_env(train_cond, mass_list, force_list, init_mouse, T, mode, prior, reward_stop) # s_next, r, is_done, r_others = new_env.step(a) control_vec = { 'obj': np.repeat(0, T), 'x': np.repeat(0, T), 'y': np.repeat(0, T) } current_cond = new_env.update_condition(new_env.cond['mass'], new_env.cond['lf']) new_env.update_bodies(current_cond) local_data = new_env.initial_data() for t in range(T): local_trajectory = new_env.update_simulate_bodies( current_cond, control_vec, t, local_data) trajectory = [] for f in range(T): trajectory.append([ 0, 0, 0, 0, 0, 0, local_trajectory['o1']['x'][f], local_trajectory['o1']['y'][f], local_trajectory['o1']['vx'][f], local_trajectory['o1']['vy'][f], local_trajectory['o2']['x'][f], local_trajectory['o2']['y'][f], local_trajectory['o2']['vx'][f], local_trajectory['o2']['vy'][f], local_trajectory['o3']['x'][f], local_trajectory['o3']['y'][f], local_trajectory['o3']['vx'][f], local_trajectory['o3']['vy'][f], local_trajectory['o4']['x'][f], local_trajectory['o4']['y'][f], local_trajectory['o4']['vx'][f], local_trajectory['o4']['vy'][f] ]) return trajectory
from environment import physic_env import numpy as np from config import * import math import gizeh as gz import moviepy.editor as mpy new_env = physic_env(cond,mass_list,force_list,init_mouse,T,ig_mode, prior) #control_vec = {'obj': np.append(np.repeat(0, 60), np.repeat(1, 180)), 'x':np.repeat(3, 240), 'y':np.repeat(3, 240)} # test 10 time frame: #print("*********",self.bodies[0].position[0]) # print("start") #state, reward, is_done = new_env.step(0) # state = new_env.reset() # print(len(state)) #print(reward) import time # test the whole process. #start = time.time() #print("start",time.strftime("%H:%M:%S", time.localtime())) for i in range(cond['timeout']/T): idx = np.random.randint(0,645) states, reward, is_done = new_env.step(idx) print(is_done) print(reward) data = new_env.step_data()
fig = plt.gcf() fig.savefig('Qlearning_{}_cum_reward.png'.format(name)) if args.save_model: model_json = nn.to_json() with open('Qlearning_{}.json'.format(name), 'w') as json_file: json_file.write(model_json) nn.save_weights('Qlearning_{}.h5'.format(name)) print("Model saved!") np.savetxt('Qlearning_{}.txt'.format(name), (rewards, loss, np.cumsum(rewards).tolist())) print("Training details saved!") plt.show() return if __name__ == "__main__": parser = argparse.ArgumentParser(description='training a q-function approximator') parser.add_argument('--epochs', type=int, action='store', help='number of epoches to train', default=10) parser.add_argument('--mode', type=int, action='store', help='type of intrinsic reward, 1 for mass, 2 for force', default=1) parser.add_argument('--save_model', type=bool, action='store', help='save trained model or not', default=True) parser.add_argument('--sessions', type=int, action='store', help='number of sessions to train per epoch', default=10) args = parser.parse_args() print(args) # initialize the environment new_env = physic_env(train_cond, mass_list, force_list, init_mouse, T, args.mode, prior, reward_stop) # train train_loop(args)