def generate_test():
    mode = 1
    T = 500
    new_env = physic_env(train_cond, mass_list, force_list, init_mouse, T,
                         mode, prior, reward_stop)

    # s_next, r, is_done, r_others = new_env.step(a)

    control_vec = {
        'obj': np.repeat(0, T),
        'x': np.repeat(0, T),
        'y': np.repeat(0, T)
    }

    current_cond = new_env.update_condition(new_env.cond['mass'],
                                            new_env.cond['lf'])
    new_env.update_bodies(current_cond)
    local_data = new_env.initial_data()

    for t in range(T):
        local_trajectory = new_env.update_simulate_bodies(
            current_cond, control_vec, t, local_data)
    trajectory = []
    for f in range(T):
        trajectory.append([
            0, 0, 0, 0, 0, 0, local_trajectory['o1']['x'][f],
            local_trajectory['o1']['y'][f], local_trajectory['o1']['vx'][f],
            local_trajectory['o1']['vy'][f], local_trajectory['o2']['x'][f],
            local_trajectory['o2']['y'][f], local_trajectory['o2']['vx'][f],
            local_trajectory['o2']['vy'][f], local_trajectory['o3']['x'][f],
            local_trajectory['o3']['y'][f], local_trajectory['o3']['vx'][f],
            local_trajectory['o3']['vy'][f], local_trajectory['o4']['x'][f],
            local_trajectory['o4']['y'][f], local_trajectory['o4']['vx'][f],
            local_trajectory['o4']['vy'][f]
        ])
    return trajectory
示例#2
0
from environment import physic_env
import numpy as np
from config import *
import math
import gizeh as gz
import moviepy.editor as mpy

new_env = physic_env(cond,mass_list,force_list,init_mouse,T,ig_mode, prior)
#control_vec = {'obj': np.append(np.repeat(0, 60), np.repeat(1, 180)), 'x':np.repeat(3, 240), 'y':np.repeat(3, 240)}


# test 10 time frame:
#print("*********",self.bodies[0].position[0])
# print("start")
#state, reward, is_done = new_env.step(0)
# state = new_env.reset()
# print(len(state))
#print(reward)

import time
# test the whole process.
#start = time.time()
#print("start",time.strftime("%H:%M:%S", time.localtime()))

for i in range(cond['timeout']/T):
	idx = np.random.randint(0,645)
	states, reward, is_done = new_env.step(idx)
	print(is_done)
	print(reward)

data = new_env.step_data()
示例#3
0
        fig = plt.gcf()
        fig.savefig('Qlearning_{}_cum_reward.png'.format(name))

    if args.save_model:
        model_json = nn.to_json()
        with open('Qlearning_{}.json'.format(name), 'w') as json_file:
            json_file.write(model_json)
        nn.save_weights('Qlearning_{}.h5'.format(name))
        print("Model saved!")
        np.savetxt('Qlearning_{}.txt'.format(name), (rewards, loss, np.cumsum(rewards).tolist()))
        print("Training details saved!")

    plt.show()

    return

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='training a q-function approximator')
    parser.add_argument('--epochs', type=int, action='store', help='number of epoches to train', default=10)
    parser.add_argument('--mode', type=int, action='store', help='type of intrinsic reward, 1 for mass, 2 for force', default=1)
    parser.add_argument('--save_model', type=bool, action='store', help='save trained model or not', default=True)
    parser.add_argument('--sessions', type=int, action='store', help='number of sessions to train per epoch', default=10)

    args = parser.parse_args()
    print(args)

    # initialize the environment
    new_env = physic_env(train_cond, mass_list, force_list, init_mouse, T, args.mode, prior, reward_stop)
    # train
    train_loop(args)