Пример #1
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--env_name', type=str,
                        default='HalfCheetah-v2')  #HalfCheetah-v2
    # Experiment meta-params
    parser.add_argument('--exp_name', type=str, default='mb_mpc')
    parser.add_argument('--seed', type=int, default=3)
    parser.add_argument('--render', action='store_true')
    # Training args
    parser.add_argument('--learning_rate', '-lr', type=float, default=1e-3)
    parser.add_argument('--onpol_iters', '-n', type=int,
                        default=5)  # Aggregation iters 10
    parser.add_argument('--dyn_iters', '-nd', type=int,
                        default=60)  # epochs 60
    parser.add_argument('--batch_size', '-b', type=int, default=512)
    # Data collection
    parser.add_argument('--random_paths', '-r', type=int,
                        default=700)  # random path nums 700
    parser.add_argument('--onpol_paths', '-d', type=int,
                        default=10)  # mpc path nums   10
    parser.add_argument('--ep_len', '-ep', type=int,
                        default=1000)  # 1000   path length   1000
    # Neural network architecture args
    parser.add_argument('--n_layers', '-l', type=int, default=2)
    parser.add_argument('--size', '-s', type=int, default=500)
    # MPC Controller
    parser.add_argument('--mpc_horizon', '-m', type=int,
                        default=15)  # mpc simulation H  10
    parser.add_argument('--simulated_paths', '-sp', type=int,
                        default=10000)  # mpc  candidate  K 100
    args = parser.parse_args()

    print(args)
    # Set seed
    np.random.seed(args.seed)
    tf.set_random_seed(args.seed)

    # Make data directory if it does not already exist

    # Make env
    if args.env_name is 'HalfCheetah-v2':
        env = HalfCheetahEnvNew()
        cost_fn = cheetah_cost_fn

    env_name = args.env_name  # HalfCheetah-v2  My3LineDirect-v1
    cost_fn = cheetah_cost_fn
    env = gym.make(env_name)
    # env.set_goals(45 * 3.14 / 180.0)  # 角度要换成弧度

    logdir = configure_log_dir(logname=env_name, txt='-train')
    utils.LOG_PATH = logdir

    with open(logdir + '/info.txt', 'wt') as f:
        print('Hello World!\n', file=f)
        print(args, file=f)

    train(
        env=env,
        cost_fn=cost_fn,
        logdir=logdir,
        render=args.render,
        learning_rate=args.learning_rate,
        onpol_iters=args.onpol_iters,
        dynamics_iters=args.dyn_iters,
        batch_size=args.batch_size,
        num_paths_random=args.random_paths,
        num_paths_onpol=args.onpol_paths,
        num_simulated_paths=args.simulated_paths,
        env_horizon=args.ep_len,
        mpc_horizon=args.mpc_horizon,
        n_layers=args.n_layers,
        size=args.size,
        activation='relu',
        output_activation=None,
    )
Пример #2
0
parser.add_argument('--pop_size', type=int, default=8)
parser.add_argument('--max_gen', type=int, default=1000)
parser.add_argument('--CXPB', type=float, default=0.8)
parser.add_argument('--MUTPB', type=float, default=0.1)
parser.add_argument('--gain_max', type=float, default=2.0)
parser.add_argument('--speed_max', type=float, default=2.0)
args = parser.parse_args()

env_name = args.env_name
env = gym.make(env_name)
log_name = 'PSO4_open'

# Set the logging variables
# This also creates a new log file
# Create log files
log_dir = configure_log_dir(env_name, txt=log_name, No_time=False)
logging_output(log_dir)
logger = LoggerCsv(log_dir, csvname='log_results')
results_IO = IO(os.path.join(log_dir, 'results.pkl'))
args_IO = IO(os.path.join(log_dir, 'args.pkl')).to_pickle(args)


def parmeter_generate(pmin, pmax):
    parm_list = [random.uniform(pmin, pmax) for _ in range(27)]

    return parm_list


def generate(size, pmin, pmax, smin, smax):
    part = creator.Particle(parmeter_generate(pmin, pmax))
    part.speed = [random.uniform(smin, smax) for _ in range(size)]
Пример #3
0
            CPG_parm_num,
        ]
else:
    assert print("env :{} task does not implemented.".format(args.env_name))

env = gym.make(env_name)
log_name = args.env_name + '_PSO_' + args.task_mode
evaluate_fun = partial(oscillator_nw,
                       env_name=env_name,
                       max_time=args.max_time,
                       fitness_option=args.fitness_mode)

# Create log files
exp_group_dir = args.exp_group_dir
log_dir = configure_log_dir(env_name,
                            txt=log_name,
                            No_time=False,
                            log_group=exp_group_dir)
logging_output(log_dir)
logger = LoggerCsv(log_dir, csvname='log_results')
results_IO = IO(os.path.join(log_dir, 'results.pkl'))
args_IO = IO(os.path.join(log_dir, 'args.pkl')).to_pickle(args)

gain_max = args.gain_max
bias_max = args.bias_max
phase_max = args.phase_max

log.info('[System] parmeters: {}'.format(args))
log.info('*********************************************')
log.info('ENV : {}     task_mode: {}'.format(env_name, task_mode))
log.info('ENV : {}     fitness: {}'.format(env_name, args.fitness_mode))
log.info('ENV : {}     gain_max: {}'.format(env_name, gain_max))
Пример #4
0
def main(args):
    tf.set_random_seed(args.seed)
    np.random.seed(args.seed)

    env_name = args.env_name  # HalfCheetah-v2  My3LineDirect-v1
    print(env_name)

    if args.env_name == 'HalfCheetahEnvDisableEnv-v0':
        cost_fn = cheetah_cost_fn
        sample_task_fun = np.random.randint
    elif args.env_name == 'HalfCheetahVaryingEnv-v0':
        cost_fn = cheetah_cost_fn
        sample_task_fun = np.random.uniform
    else:
        print('env is error!!! ')

    env = gym.make(env_name)
    dim_input = env.observation_space.shape[0] + env.action_space.shape[0]
    dim_output = env.observation_space.shape[0]

    logdir = configure_log_dir(logname=env_name, txt=args.note)
    # save args prameters
    with open(logdir + '/info.txt', 'wt') as f:
        print('Hello World!\n', file=f)
        print(args, file=f)

    mpc_horizon = args.mpc_horizon
    num_simulated_paths = args.simulated_paths  #10000

    dyn_model = Dynamics(
        args.env_name,
        args.NumOfExp,
        args.model_type,
        args.loss_type,
        dim_input,
        dim_output,
        beta=args.beta,  #args.beta,
        max_epochs=args.max_epochs,
        is_train=args.is_train,
        norm=args.norm,
        task_Note=args.note,
        restore_checkpoint=args.restore_checkpoint,
        restore_dir=args.restore_dir,
        logdir=logdir)

    mpc_controller = MPCcontroller(
        env=env,
        dyn_model=dyn_model,
        horizon=mpc_horizon,
        cost_fn=cost_fn,
        num_simulated_paths=num_simulated_paths,
    )
    logger = Logger(logdir, csvname='log')

    num_itr = args.num_itr
    experiences, costs = [], []
    print('MPC is beginning...')
    for itr in range(num_itr):
        reward, model_loss_mean = rollout(
            env,
            mpc_controller,
            task_goal=args.task_goal,
            dyn_model=dyn_model,
            experiences=experiences,
            NumOfExp=args.NumOfExp,
            horizon=args.horizon,
            cost_fn=cheetah_cost_fn,
            render=False,
            verbose=False,
            save_video=False,
            ignore_done=True,
        )

        #print(time.asctime( time.localtime(time.time()) ), ' itr :', itr, 'Average reward :' , cost)
        log.infov(
            "Itr {}/{} Accumulated Reward: {:.4f}  Model loss mean:{:.4f}".
            format(itr, num_itr, reward, model_loss_mean))

        logger.log({
            'itr': itr,
            'Accumulated Reward': reward,
            'Model loss mean': model_loss_mean,
        })

    print('MPC is over....')

    logger.write(display=False)
Пример #5
0
epoch_size = 1000  #1000
learning_rate = 0.0001

#DAGGER
n_episode = 4
steps = 1000  # maximum step for a game
dagger_epoch_size = 1000  #1000
dagger_batch_size = 1024  #

#MPC
dyn_model = torch.load('data/best_A01_net.pkl')  #net.pkl
cost_fn = cheetah_cost_fn
mpc_horizon = 15
num_simulated_paths = 10000  # 10000

logdir = configure_log_dir(logname=env_name, txt='-Dagger-scale')


def compute_normalization(data):
    """
	Write a function to take in a dataset and compute the means, and stds.
	Return 6 elements: mean of s_t, std of s_t, mean of (s_t+1 - s_t), std of (s_t+1 - s_t), mean of actions, std of actions

	X_scaled = scaler.transform(X)
	X_inv=scaler.inverse_transform(X_scaled)
	"""
    """ YOUR CODE HERE """
    scaler = preprocessing.StandardScaler().fit(data)

    return scaler
Пример #6
0
epoch_size =500   #1000
learning_rate =0.0001

#DAGGER
n_episode =10		# num of rollout
steps = 10000        # model-based length
dagger_epoch_size =1000  #1000
dagger_batch_size =1024 #

#MPC
dyn_model =  torch.load('data/net.pkl')
cost_fn = cheetah_cost_fn
mpc_horizon =15
num_simulated_paths=10000   # 10000

logdir = configure_log_dir(logname=env_name, txt='-Test_policy')


def compute_normalization(data):
	"""
	Write a function to take in a dataset and compute the means, and stds.
	Return 6 elements: mean of s_t, std of s_t, mean of (s_t+1 - s_t), std of (s_t+1 - s_t), mean of actions, std of actions

	X_scaled = scaler.transform(X)
	X_inv=scaler.inverse_transform(X_scaled)
	"""
	""" YOUR CODE HERE """
	scaler = preprocessing.StandardScaler().fit(data)

	return scaler
Пример #7
0
#(x_train,y_train),(x_test,y_test) = load_data('log-test1.csv', test_percentage = 1,data_num =1000)
(x_train, y_train), (x_test, y_test) = load_data('log-test1.csv',
                                                 test_percentage=1,
                                                 data_num=1000)
num_predict = 1000

# reload model
dyn_model = torch.load('net.pkl')

states_eval = predict_error_scaled(dyn_model,
                                   x_test,
                                   y_test,
                                   lengthOfRollout=num_predict)

# Create log files
logdir = configure_log_dir(logname=env_name, txt='ModelTest')

# save traj of evaluation
logger = Logger(logdir, csvname='log_test')
trajectory = {}
tra_name = [
    's1-qpos1', 's2-qpos2', 's3-qpos3', 's4-qpos4', 's5-qpos5', 's6-qpos6',
    's7-qpos7', 's8-qpos8', 's9-qvel0', 's10-qvel1', 's11-qvel2', 's12-qvel3',
    's13-qvel4', 's14-qvel5', 's15-qvel6', 's16-qvel7', 's17-qvel8',
    's18-com0', 's19-com1', 's20-com2'
]
for j in range(states_eval.shape[0]):
    for i in range(states_eval.shape[1]):
        trajectory[tra_name[i]] = states_eval[j][i]
    logger.log(trajectory)
    logger.write(display=False)