Exemplo n.º 1
0
def launch(args):
    # Experiment parameters
    exp_kwargs = {
        'goal_tolerance_parameters': {
            'inc_tol_obs': True,
            'initial_tol': 0.020,
            'final_tol': 0.001,
            'N_ts': 200000,
            'function': 'decay',
            'set_tol': 0
        },
        'relative_q': True,
        'joint_representation': 'trig',
        'resample_joints': False,
        'normalize_obs': False
    }
    # create the ddpg_agent, test relative decay curriculum
    env = gym.make(args.env_name, **exp_kwargs)
    # set random seeds for reproduce
    env.seed(args.seed + MPI.COMM_WORLD.Get_rank())
    random.seed(args.seed + MPI.COMM_WORLD.Get_rank())
    np.random.seed(args.seed + MPI.COMM_WORLD.Get_rank())
    torch.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank())
    if args.cuda:
        torch.cuda.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank())
    # get the environment parameters
    env_params = get_env_params(env)
    # create the ddpg agent to interact with the environment
    ddpg_trainer = ddpg_agent(args, env, env_params)
    ddpg_trainer.learn()
Exemplo n.º 2
0
def launch(args):
    env = gym.make(args.env_name)
    # set random seeds for reproduce
    env.seed(args.seed + MPI.COMM_WORLD.Get_rank())
    random.seed(args.seed + MPI.COMM_WORLD.Get_rank())
    np.random.seed(args.seed + MPI.COMM_WORLD.Get_rank())
    torch.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank())
    if args.cuda:
        torch.cuda.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank())
    # get the environment parameters
    env_params = get_env_params(env)
    
    # create the ddpg_agent
    if args.alg == 'gac':
        # create the ddpg agent to interact with the environment 
        print("Start GAC...")
        gac_trainer = gac_agent(args, env, env_params)
        gac_trainer.learn()
    elif args.alg == 'sac':
        # create the ddpg agent to interact with the environment 
        print("Start SAC...")
        sac_trainer = sac_agent(args, env, env_params)
        sac_trainer.learn()
    elif args.alg == 'td3':
        print('Start TD3...')
        td3_trainer = td3_agent(args, env, env_params)
        td3_trainer.learn()
    else:
        # create the ddpg agent to interact with the environment 
        print("Start DDPG...")
        ddpg_trainer = ddpg_agent(args, env, env_params)
        ddpg_trainer.learn()
def launch(args):
    # create the ddpg_agent
    env = gym.make(args.env_name)
    # set random seeds for reproduce
    env.seed(args.seed)
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)
    # get the environment parameters
    env_params = get_env_params(env)

    if args.algo == 'ddpg':
        eval = ddpg_agent(args, env, env_params)
        eval.visualize(PATH)
    elif args.algo == 'sac':
        eval = sac_agent(args, env, env_params)
        eval.visualize(PATH)
    elif args.algo == 'forward':
        eval = dynamics_learner(args, env, env_params)
        eval.eval_dynamics(PATH)
    elif args.algo == 'inverse':
        eval = inverse_dynamics_learner(args, env, env_params)
        eval.visualize(PATH)
    elif args.algo == 'double':
        eval = double_agent(args, env, env_params)
        eval.visualize(PATH)
    elif args.algo == 'share':
        eval = double_agent_share(args, env, env_params)
        eval.visualize(PATH)
    elif args.algo == 'shaping_share':
        eval = double_agent_shaping_share(args, env, env_params)
        eval.visualize(PATH)
    elif args.algo == 'attention':
        eval = double_agent_attention(args, env, env_params)
        eval.visualize(PATH)
    elif args.algo == 'sgg':
        eval = sgg_agent(args, env, env_params)
        eval.visualize(PATH)
    elif args.algo == 'alt':
        eval = alternate_agent(args, env, env_params)
        eval.visualize(PATH)
    elif args.algo == 'br':
        eval = sac_br_agent(args, env, env_params)
        eval.visualize(PATH)
    elif args.algo == 'br_ppo':
        eval = sac_br_agent_ppo(args, env, env_params)
        eval.visualize(PATH)
    elif args.algo == 'skill':
        eval = sac_skill_agent(args, env, env_params)
        eval.visualize(PATH)
    else:
        raise NotImplementedError
Exemplo n.º 4
0
def launch(args):
    # create the ddpg_agent
    env = gym.make(args.env_name)
    # set random seeds for reproduce
    env.seed(args.seed + MPI.COMM_WORLD.Get_rank())
    random.seed(args.seed + MPI.COMM_WORLD.Get_rank())
    np.random.seed(args.seed + MPI.COMM_WORLD.Get_rank())
    torch.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank())
    if args.cuda:
        torch.cuda.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank())
    # get the environment parameters
    env_params = get_env_params(env)
    # create the ddpg agent to interact with the environment
    ddpg_trainer = ddpg_agent(args, env, env_params)
    ddpg_trainer.learn()
Exemplo n.º 5
0
def launch(args):
    # create the ddpg_agent
    env = SharedBlockTouchSensorsEnvSparse()
    env = TimeLimit(env, 200)
    # set random seeds for reproduce
    env.seed(args.seed + MPI.COMM_WORLD.Get_rank())
    random.seed(args.seed + MPI.COMM_WORLD.Get_rank())
    np.random.seed(args.seed + MPI.COMM_WORLD.Get_rank())
    torch.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank())
    if args.cuda:
        torch.cuda.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank())
    # get the environment parameters
    env_params = get_env_params(env)
    # create the ddpg agent to interact with the environment
    ddpg_trainer = ddpg_agent(args, env, env_params)
    ddpg_trainer.learn()
Exemplo n.º 6
0
def launch(args):
    # create the ddpg_agent
    env = gym.make('gym_multiRL:MultiRL{}'.format(args.env_name))
    # set random seeds for reproduce
    env.seed(args.seed + MPI.COMM_WORLD.Get_rank())
    random.seed(args.seed + MPI.COMM_WORLD.Get_rank())
    np.random.seed(args.seed + MPI.COMM_WORLD.Get_rank())
    torch.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank())
    if args.cuda:
        torch.cuda.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank())
    # get the environment parameters
    env_params = get_env_params(env, args)
    # create the ddpg agent to interact with the environment
    # if args.actor_loss_type=='mgda':
    #     ddpg_trainer = ddpg_agent_mgda(args, env, env_params)
    # else:
    ddpg_trainer = ddpg_agent(args, env, env_params)
    ddpg_trainer.learn()
Exemplo n.º 7
0
def launch(args):
    # create the ddpg_agent
    env = gym.make(args.env_name)
    # set random seeds for reproduce
    env.seed(args.seed + MPI.COMM_WORLD.Get_rank())
    random.seed(args.seed + MPI.COMM_WORLD.Get_rank())
    np.random.seed(args.seed + MPI.COMM_WORLD.Get_rank())
    torch.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank())
    if args.cuda:
        torch.cuda.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank())
    # get the environment parameters
    env_params = get_env_params(env)
    # create the ddpg agent to interact with the environment
    reset_dir('./experiments/log')
    tb_wrt = SummaryWriter('./experiments/log')

    ddpg_trainer = ddpg_agent(args,
                              env,
                              env_params,
                              tb_wrt,
                              is_her=args.is_her)
    ddpg_trainer.learn()