Пример #1
0
                    metavar='N',
                    help='model updates per simulator step (default: 5)')
parser.add_argument('--num-stack',
                    type=int,
                    default=1,
                    help='number of frames to stack')
parser.add_argument('--model-suffix',
                    default="",
                    help='To resume training or not')
args = parser.parse_args()

env = NormalizedActions(gym.make(args.env_name))

writer = SummaryWriter()

env.seed(args.seed)
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    torch.cuda.manual_seed(args.seed)
else:
    device = torch.device("cpu")
    torch.manual_seed(args.seed)

np.random.seed(args.seed)

obs_shape = env.observation_space.shape
obs_shape = (obs_shape[0] * args.num_stack, *obs_shape[1:])

if len(env.observation_space.shape) == 3:
    image_input = True
Пример #2
0
        json.dump(vars(args), outfile)
    cnn = args.pics
    for i_run in range(args.max_num_run):
        logger.important(f"START TRAINING RUN {i_run}")

        # Make the environment
        env = gym.make(args.env_name)
        env._max_episode_steps = args.max_num_step
        env = NormalizedActions(env)
        if cnn:
            env = ImageWrapper(args.img_size, env)

        # Set Seed for repeatability
        torch.manual_seed(args.seed + i_run)
        np.random.seed(args.seed + i_run)
        env.seed(args.seed + i_run)
        env.action_space.np_random.seed(args.seed + i_run)

        # Setup the agent
        agent = SAC(args.state_buffer_size, env.action_space, args)

        # Setup TensorboardX
        writer_train = SummaryWriter(log_dir='runs/' + folder + 'run_' +
                                     str(i_run) + '/train')
        writer_test = SummaryWriter(log_dir='runs/' + folder + 'run_' +
                                    str(i_run) + '/test')

        # Setup Replay Memory
        memory = ReplayMemory(args.replay_size)

        # TRAINING LOOP
Пример #3
0
                    help='max episode length (default: 1000)')
parser.add_argument('--num_episodes', type=int, default=1000, metavar='N',
                    help='number of episodes (default: 1000)')
parser.add_argument('--hidden_size', type=int, default=128, metavar='N',
                    help='number of episodes (default: 128)')
parser.add_argument('--updates_per_step', type=int, default=5, metavar='N',
                    help='model updates per simulator step (default: 5)')
parser.add_argument('--replay_size', type=int, default=1000000, metavar='N',
                    help='size of replay buffer (default: 1000000)')
args = parser.parse_args()

env = NormalizedActions(gym.make(args.env_name))

writer = SummaryWriter()

env.seed(args.seed)
torch.manual_seed(args.seed)
np.random.seed(args.seed)
if args.algo == "NAF":
    agent = NAF(args.gamma, args.tau, args.hidden_size,
                      env.observation_space.shape[0], env.action_space)
else:
    agent = DDPG(args.gamma, args.tau, args.hidden_size,
                      env.observation_space.shape[0], env.action_space)

memory = ReplayMemory(args.replay_size)

ounoise = OUNoise(env.action_space.shape[0]) if args.ou_noise else None
param_noise = AdaptiveParamNoiseSpec(initial_stddev=0.05, 
    desired_action_stddev=args.noise_scale, adaptation_coefficient=1.05) if args.param_noise else None
Пример #4
0
    env = sys.argv[1]
    args = None

    if env == 'mc':
        args = args_mc
    elif env == 'pd':
        args = args_pd
    elif env == 'll':
        args = args_ll
    else:
        print('Environment not selected, Please choose from: mc, pd,ll')
        exit(-1)

    env = NormalizedActions(gym.make(args['env_name']))

    env.seed(args['seed'])
    torch.manual_seed(args['seed'])
    np.random.seed(args['seed'])

    agent = NAF(args['gamma'], args['tau'], args['hidden_size'],
                env.observation_space.shape[0], env.action_space)
    agent.load_model(f'models/naf_{args["env_name"]}')

    replay_buffer = ReplayBuffer(args['replay_size'])

    ounoise = OUNoise(env.action_space.shape[0]) if args['ou_noise'] else None

    run()

    plot_results()