def test(args=get_args()): torch.set_num_threads(1) # we just need only one thread for NN env = gym_make() model_path = os.path.join(args.logdir, args.task, 'ddpg/policy.pth') layer = [1024, 512, 512, 512] device = 'cuda' state_shape = env.observation_space.shape or env.observation_space.n action_shape = env.action_space.shape or env.action_space.n action_range = [env.action_space.low, env.action_space.high] actor = Actor( layer, state_shape, action_shape, action_range, device ).to(device) critic = Critic( layer, state_shape, action_shape, device ).to(device) actor = actor.to(device) actor_optim = torch.optim.Adam(actor.parameters()) critic = critic.to(device) critic_optim = torch.optim.Adam(critic.parameters()) policy = DDPGPolicy( actor, actor_optim, critic, critic_optim, action_range=action_range) policy.load_state_dict(torch.load(model_path, map_location=device)) obs = env.reset() # env.state[0] = -30.0 # env.goal[0] = 30.0 env.render() print(env.goal) while True: action, _ = policy.actor(obs.reshape(1,-1), eps=0.01) action = action.detach().cpu().numpy()[0] obs, reward, done, info = env.step(action) # print(env.state) # print(reward) # print(info) env.render() if done: break
classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) # create the objects for two networks and for the two optimizers generator = Generator(latent=opt.latent, channels=opt.channels, num_filters=opt.num_filters) critic = Critic(channels=opt.channels, num_filters=opt.num_filters) optimizer_G = torch.optim.RMSprop(generator.parameters(), lr=opt.learning_rate) optimizer_C = torch.optim.RMSprop(critic.parameters(), lr=opt.learning_rate) # put the nets on gpu device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") generator, critic = generator.to(device), critic.to(device) generator.apply(weights_init) critic.apply(weights_init) if opt.dataset == 'cifar10': print(ROOT_DIR + "/cifar") if not os.path.isdir(ROOT_DIR + "/cifar"): os.mkdir(ROOT_DIR + "/cifar") elif opt.dataset == 'LSUN': if not os.path.isdir(ROOT_DIR + "/bedrooms"): os.mkdir(ROOT_DIR + "/bedrooms") # start training current_epoch = 0 gen_iterations = 0 for epoch in range(opt.n_epochs):